├── tests ├── scripts │ ├── query_5.sql │ ├── query_1.sql │ ├── query_2.sql │ ├── query_3.sql │ ├── query_4.sql │ ├── data_load.sql │ └── init.sql ├── main.py ├── utils.py ├── test_functions_profile.py └── test_trace_session.py ├── .gitignore ├── Docker ├── configure.sh ├── README.md └── Dockerfile ├── pg_uprobe.control ├── CMakeLists.txt ├── src ├── include │ ├── json_to_jsonbvalue_parser.h │ ├── trace_parsing.h │ ├── trace_planning.h │ ├── lockmanager_trace.h │ ├── uprobe_shared_config.h │ ├── trace_session.h │ ├── trace_wait_events.h │ ├── custom_uprobe_interface.h │ ├── trace_lock_on_buffers.h │ ├── uprobe_internal.h │ ├── trace_file.h │ ├── uprobe_factory.h │ ├── trace_execute_nodes.h │ ├── count_uprobes.h │ ├── uprobe_attach_interface.h │ ├── list.h │ └── uprobe_message_buffer.h ├── trace_parsing.c ├── uprobe_factory.c ├── trace_file.c ├── list.c ├── trace_planning.c ├── uprobe_collector.c ├── uprobe_shared_config.c ├── json_to_jsonbvalue_parser.c ├── uprobe_message_buffer.c ├── trace_wait_events.c ├── lockmanager_trace.c ├── uprobe_internal.c └── trace_lock_on_buffers.c ├── doc ├── json_schema │ ├── execution_event.json │ ├── trace_json_schema.json │ ├── trace_data_in_plan.json │ ├── wait_event.json │ ├── expr_node_stat.json │ ├── execution_event_simple.json │ ├── one_query.json │ └── locks_on_buffers.json ├── example_trace_session.md ├── example_profile_func.md ├── profile_func.md └── trace_session.md ├── CHANGELOG.md ├── Makefile ├── pg_uprobe--0.3.sql ├── gen_node_names_array.py ├── frida.cmake └── README.md /tests/scripts/query_5.sql: -------------------------------------------------------------------------------- 1 | SELECT p.name, p.stock 2 | FROM products p 3 | WHERE p.stock < 50; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | build 3 | third_party 4 | *.bc 5 | *.o 6 | *.so 7 | node_names.h 8 | tests/__pycache__ 9 | -------------------------------------------------------------------------------- /Docker/configure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "shared_preload_libraries = 'pg_uprobe'" >> /var/lib/postgresql/data/postgresql.conf 3 | -------------------------------------------------------------------------------- /tests/scripts/query_1.sql: -------------------------------------------------------------------------------- 1 | SELECT u.first_name, u.last_name, o.order_date, o.total_amount, o.status 2 | FROM users u 3 | JOIN orders o ON u.id = o.user_id 4 | WHERE o.total_amount > 70000.00; -------------------------------------------------------------------------------- /pg_uprobe.control: -------------------------------------------------------------------------------- 1 | # pg_uprobe extension 2 | comment = 'Measure postgres functions execution time' 3 | default_version = '0.3' 4 | module_pathname = '$libdir/pg_uprobe' 5 | relocatable = true 6 | -------------------------------------------------------------------------------- /tests/scripts/query_2.sql: -------------------------------------------------------------------------------- 1 | SELECT p.name, SUM(oi.quantity) AS total_quantity 2 | FROM products p 3 | JOIN order_items oi ON p.id = oi.product_id 4 | GROUP BY p.id, p.name 5 | HAVING SUM(oi.quantity) > 1; -------------------------------------------------------------------------------- /tests/scripts/query_3.sql: -------------------------------------------------------------------------------- 1 | SELECT u.first_name, u.last_name, SUM(o.total_amount) AS total_spent 2 | FROM users u 3 | LEFT JOIN orders o ON u.id = o.user_id 4 | GROUP BY u.id, u.first_name, u.last_name; -------------------------------------------------------------------------------- /tests/scripts/query_4.sql: -------------------------------------------------------------------------------- 1 | SELECT u.first_name, u.last_name, COUNT(*) AS number_of_orders 2 | FROM users u 3 | JOIN orders o ON u.id = o.user_id 4 | WHERE o.order_date BETWEEN '2023-01-01' AND '2023-12-31' 5 | GROUP BY u.id, u.first_name, u.last_name; -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | set(CMAKE_C_STANDARD 99) 3 | if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") 4 | cmake_policy(SET CMP0135 NEW) 5 | endif() 6 | 7 | project( 8 | "load_frida" 9 | VERSION 1.0 10 | LANGUAGES C 11 | ) 12 | 13 | include(frida.cmake) -------------------------------------------------------------------------------- /src/include/json_to_jsonbvalue_parser.h: -------------------------------------------------------------------------------- 1 | #ifndef JSON_TO_JSONBVALUE_PARSER_H 2 | #define JSON_TO_JSONBVALUE_PARSER_H 3 | #include "postgres.h" 4 | #include "utils/jsonb.h" 5 | 6 | extern JsonbValue *jsonToJsonbValue(char *json, size_t len); 7 | 8 | #endif /* JSON_TO_JSONBVALUE_PARSER_H */ 9 | -------------------------------------------------------------------------------- /doc/json_schema/execution_event.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "execution_event.json", 4 | "title": "Execution event", 5 | "type": "object", 6 | "oneOf": [ 7 | {"$ref": "execution_event_simple.json"}, 8 | {"$ref": "one_query.json"} 9 | ] 10 | } -------------------------------------------------------------------------------- /src/include/trace_parsing.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_PARSING_H 2 | #define TRACE_PARSING_H 3 | 4 | #include "uprobe_attach_interface.h" 5 | 6 | extern UprobeAttachInterface *ParsingUprobeGet(void); 7 | 8 | extern void ParsingWriteData(void); 9 | 10 | extern void ParsingClearData(void); 11 | 12 | #endif /* TRACE_PARSING_H */ 13 | -------------------------------------------------------------------------------- /src/include/trace_planning.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_PLANNING_H 2 | #define TRACE_PLANNING_H 3 | 4 | #include "uprobe_attach_interface.h" 5 | 6 | extern void PlanningUprobesGet(UprobeAttachInterface **resUprobes, MemoryContext context); 7 | 8 | extern void PlanningWriteData(void); 9 | 10 | extern void PlanningClearData(void); 11 | 12 | #endif /* TRACE_PLANNING_H */ 13 | -------------------------------------------------------------------------------- /Docker/README.md: -------------------------------------------------------------------------------- 1 | git clone git@github.com:postgrespro/pg_uprobe.git 2 | 3 | docker build -t pg16-pg_uprobe:latest . 4 | 5 | docker run -d \ 6 | --name pg16-pg_uprobe \ 7 | -e POSTGRES_USER=postgres \ 8 | -e POSTGRES_PASSWORD=postgres \ 9 | -e POSTGRES_DB=postgres \ 10 | -p 5432:5432 \ 11 | -v ./pg16-data:/var/lib/postgresql/data \ 12 | pg16-pg_uprobe:latest 13 | 14 | 15 | docker exec -it pg16-pg_uprobe sh -------------------------------------------------------------------------------- /doc/json_schema/trace_json_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Trace session output", 4 | "type": "object", 5 | "properties": { 6 | "pid": { 7 | "type": "number" 8 | }, 9 | "queries": { 10 | "type": "array", 11 | "items": { 12 | "$ref": "one_query.json" 13 | } 14 | } 15 | }, 16 | "required": [ 17 | "pid", 18 | "queries" 19 | ] 20 | } -------------------------------------------------------------------------------- /src/include/lockmanager_trace.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCKMANAGER_TRACE_H 2 | #define LOCKMANAGER_TRACE_H 3 | 4 | #include "custom_uprobe_interface.h" 5 | 6 | 7 | extern UprobeAttachInterface *LWLockAcquireInit(const char *symbol); 8 | 9 | extern UprobeAttachInterface *LWLockReleaseInit(const char *symbol); 10 | 11 | extern UprobeStorage *LockManagerStorageInit(const char *symbol); 12 | 13 | extern UprobeStorage *NullStorageInit(const char *symbol); 14 | 15 | #endif /* LOCKMANAGER_TRACE_H */ 16 | -------------------------------------------------------------------------------- /src/include/uprobe_shared_config.h: -------------------------------------------------------------------------------- 1 | #ifndef UPROBE_SHARED_CONFIG_H 2 | #define UPROBE_SHARED_CONFIG_H 3 | 4 | 5 | typedef void (*LoadFromConfigApplyFunc) (const char *func, const char *type); 6 | 7 | 8 | extern void PGUprobeSaveInSharedConfig(char *func, char *type); 9 | 10 | 11 | extern void PGUprobeLoadFromSharedConfig(LoadFromConfigApplyFunc applyFunc); 12 | 13 | 14 | extern void PGUprobeDeleteFromSharedConfig(const char *func); 15 | 16 | #endif /* UPROBE_SHARED_CONFIG_H */ 17 | -------------------------------------------------------------------------------- /src/include/trace_session.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_SESSION_H 2 | #define TRACE_SESSION_H 3 | #include "postgres.h" 4 | #include "executor/executor.h" 5 | 6 | typedef enum TraceDataWriteMode 7 | { 8 | TEXT_WRITE_MODE, 9 | JSON_WRITE_MODE 10 | } TraceDataWriteMode; 11 | 12 | extern int writeMode; 13 | extern bool isExecuteTime; 14 | 15 | extern void SessionTraceStart(void); 16 | 17 | extern void SessionTraceStop(bool closeFile); 18 | 19 | extern void SessionTraceInit(void); 20 | 21 | #endif /* TRACE_SESSION_H */ 22 | -------------------------------------------------------------------------------- /doc/json_schema/trace_data_in_plan.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "trace_data_in_plan.json", 4 | "title": "Trace data in explain output", 5 | "type": "object", 6 | "properties": { 7 | "maxTime": { 8 | "type": "number" 9 | }, 10 | "totalCalls": { 11 | "type": "number" 12 | }, 13 | "totalTimeSum": { 14 | "type": "number" 15 | } 16 | }, 17 | "required": [ 18 | "maxTime", 19 | "totalCalls", 20 | "totalTimeSum" 21 | ] 22 | } -------------------------------------------------------------------------------- /Docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:16 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y \ 5 | build-essential \ 6 | postgresql-server-dev-16 \ 7 | git \ 8 | gcc \ 9 | python3 \ 10 | libkrb5-dev \ 11 | krb5-multidev \ 12 | cmake \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | COPY ./pg_uprobe/ /usr/src/pg_uprobe 16 | 17 | WORKDIR /usr/src/pg_uprobe 18 | RUN make USE_PGXS=1 PG_CONFIG=pg_config install 19 | 20 | COPY ./configure.sh /docker-entrypoint-initdb.d/ 21 | 22 | 23 | EXPOSE 5432 24 | CMD ["postgres"] 25 | -------------------------------------------------------------------------------- /doc/json_schema/wait_event.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "wait_event.json", 4 | "title": "Wait event stat object", 5 | "type": "object", 6 | "properties": { 7 | "name": { 8 | "type": "string" 9 | }, 10 | "count": { 11 | "type": "number" 12 | }, 13 | "timeSum": { 14 | "type": "string" 15 | }, 16 | "maxTime": { 17 | "type": "string" 18 | } 19 | }, 20 | "required": [ 21 | "name", 22 | "count", 23 | "timeSum", 24 | "maxTime" 25 | ] 26 | } -------------------------------------------------------------------------------- /doc/json_schema/expr_node_stat.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "expr_node_stat.json", 4 | "title": "Expression Node stat after execution", 5 | "type": "object", 6 | "properties": { 7 | "node": { 8 | "type": "string" 9 | }, 10 | "totalCalls": { 11 | "type": "number" 12 | }, 13 | "totalTimeSum": { 14 | "type": "string" 15 | }, 16 | "maxTime": { 17 | "type": "string" 18 | } 19 | }, 20 | "required": [ 21 | "node", 22 | "totalCalls", 23 | "totalTimeSum", 24 | "maxTime" 25 | ] 26 | } -------------------------------------------------------------------------------- /src/include/trace_wait_events.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_WAIT_EVENTS_H 2 | #define TRACE_WAIT_EVENTS_H 3 | 4 | #include "postgres.h" 5 | #include "lib/stringinfo.h" 6 | 7 | #include "uprobe_attach_interface.h" 8 | 9 | 10 | extern UprobeAttachInterface *TraceWaitEventsUprobesGet(size_t *resSize); 11 | 12 | extern void TraceWaitEventsUprobesClean(void); 13 | 14 | extern bool TraceWaitEventDumpStat(StringInfo out); 15 | 16 | extern void TraceWaitEventsClearStat(void); 17 | 18 | extern void SignalWaitEventStart(uint64 time); 19 | 20 | extern void SignalWaitEventEnd(uint64 time); 21 | 22 | #endif /* TRACE_WAIT_EVENTS_H */ 23 | -------------------------------------------------------------------------------- /doc/json_schema/execution_event_simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "execution_event_simple.json", 4 | "title": "execution event schema for sinple executor node", 5 | "type": "object", 6 | "properties": { 7 | "node": { 8 | "type": "string" 9 | }, 10 | "explain": { 11 | "type": "object", 12 | "properties": {}, 13 | "required": [] 14 | }, 15 | "executeTime": { 16 | "type": "string" 17 | }, 18 | "LWLockStat": { 19 | "type": "array", 20 | "items": { 21 | "$ref": "locks_on_buffers.json" 22 | } 23 | } 24 | }, 25 | "required": [ 26 | "node", 27 | "executeTime" 28 | ] 29 | } -------------------------------------------------------------------------------- /src/include/custom_uprobe_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef CUSTOM_UPROBE_INTERFACE_H 2 | #define CUSTOM_UPROBE_INTERFACE_H 3 | #include "uprobe_attach_interface.h" 4 | 5 | struct UprobeStorage; 6 | 7 | typedef void (*StorageDeleteFunc) (struct UprobeStorage *storage, bool shouldWriteStat); 8 | 9 | typedef void (*StoragePutDataFunc) (struct UprobeStorage *storage, void *data); 10 | 11 | typedef void (*StorageWriteStat) (struct UprobeStorage *storage, bool shouldClearStat); 12 | 13 | /* abstract struct for all storages */ 14 | typedef struct UprobeStorage 15 | { 16 | StoragePutDataFunc putData; 17 | StorageWriteStat writeStat; 18 | StorageDeleteFunc delete; 19 | char *symbol; 20 | } UprobeStorage; 21 | 22 | #endif /* CUSTOM_UPROBE_INTERFACE_H */ 23 | -------------------------------------------------------------------------------- /src/include/trace_lock_on_buffers.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_LOCK_ON_BUFFERS 2 | #define TRACE_LOCK_ON_BUFFERS 3 | #include "postgres.h" 4 | #include "lib/stringinfo.h" 5 | #include "utils/hsearch.h" 6 | 7 | #include "uprobe_attach_interface.h" 8 | 9 | 10 | extern void LockOnBuffersUprobesGet(MemoryContext context, UprobeAttachInterface **resUrpobesToAttach, bool shouldLogOnlySleep); 11 | 12 | 13 | extern bool LockOnBuffersTraceWriteStat(StringInfo stream, bool shouldClean); 14 | 15 | extern void LockOnBuffersTraceStatPush(void); 16 | 17 | extern void LockOnBuffersTraceStatPop(void); 18 | 19 | extern HTAB *LockOnBuffersTraceStatPopAndGet(void); 20 | 21 | extern void LockOnBuffersTraceWriteStatWithName(HTAB *data, const char *shortName); 22 | 23 | #endif /* TRACE_LOCK_ON_BUFFERS */ 24 | -------------------------------------------------------------------------------- /tests/main.py: -------------------------------------------------------------------------------- 1 | import testgres 2 | from testgres import PostgresNode 3 | 4 | import test_functions_profile 5 | import test_trace_session 6 | import utils 7 | 8 | def test_prepare(node: PostgresNode): 9 | node.pgbench_init(scale = "20") 10 | utils.node_load_custom_database(node) 11 | with node.connect("postgres", autocommit=True) as conn: 12 | conn.execute("create extension pg_uprobe") 13 | 14 | 15 | def run_tests(): 16 | with testgres.get_new_node().init() as node: 17 | node.append_conf("postgresql.conf", "shared_preload_libraries = 'pg_uprobe'") 18 | 19 | node.start() 20 | 21 | test_prepare(node) 22 | 23 | test_functions_profile.run_tests(node) 24 | 25 | test_trace_session.run_tests(node) 26 | 27 | 28 | 29 | 30 | if __name__ == '__main__': 31 | run_tests() -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v0.3] 4 | 5 | Improvements in Session Tracing: 6 | - Fixed PL/pgSQL function tracing – now works correctly. 7 | - Added PID at the start of the file for better trace identification. 8 | - Added query start time for precise timing analysis. 9 | - Improved format for nested queries (better readability and structure). 10 | - Updated documentation and JSON Schema for session trace output format. 11 | 12 | Other Changes: 13 | - Fixed stat_hist_uprobe SQL function (resolved known issues). 14 | - Added test suite for the extension: 15 | Tests are written in Python using the testgres library. 16 | Prerequisites: 17 | 1. Ensure PG_CONFIG environment variable points to the pg_config executable. 18 | 2. Run make python_tests to execute the tests. 19 | - Added FreeBSD support. 20 | - Upgraded Frida-Gum to version 17.1.5. -------------------------------------------------------------------------------- /tests/scripts/data_load.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO users (first_name, last_name, email, phone_number) 2 | VALUES 3 | ('Ivan', 'Ivanov', 'ivanov@example.com', '+79001234567'), 4 | ('Petr', 'Petrov', 'petrov@example.com', '+79111234567'), 5 | ('Sergei', 'Sergeev', 'sergeev@example.com', '+79221234567'); 6 | 7 | INSERT INTO products (name, description, price, stock) 8 | VALUES 9 | ('iPhone X', 'new iPhone X by Apple', 50000.00, 100), 10 | ('Samsung Galaxy S10', 'New Samsung phone', 40000.00, 150), 11 | ('Xiaomi Redmi Note 8 Pro', 'middle class Xiaomi phone', 25000.00, 200); 12 | 13 | INSERT INTO orders (user_id, total_amount, status) 14 | VALUES 15 | (1, 50000.00, 'NEW'), 16 | (2, 80000.00, 'PROCESSING'), 17 | (3, 75000.00, 'SHIPPED'); 18 | 19 | 20 | INSERT INTO order_items (order_id, product_id, quantity, unit_price) 21 | VALUES 22 | (1, 1, 1, 50000.00), 23 | (2, 2, 2, 40000.00), 24 | (3, 3, 3, 25000.00); -------------------------------------------------------------------------------- /src/include/uprobe_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef UPROBE_INTERNAL_H 2 | #define UPROBE_INTERNAL_H 3 | 4 | #include "postgres.h" 5 | 6 | #include "uprobe_attach_interface.h" 7 | 8 | typedef enum 9 | { 10 | SUCCESS, INTERNAL_ERROR, CANNOT_FIND_SYMBOL, INVALID_NUMBER_OF_ARGS 11 | } UPROBE_INIT_RES; 12 | 13 | typedef struct Uprobe Uprobe; 14 | 15 | 16 | extern MemoryContext UprobeMemoryContext; 17 | 18 | 19 | extern void UprobeInternalInit(void); 20 | 21 | extern void UprobeInternalFini(void); 22 | 23 | extern UPROBE_INIT_RES UprobeInit(UprobeAttachInterface *uprobeAttach, Uprobe **uprobe); 24 | 25 | extern void UprobeDelete(Uprobe *uprobe); 26 | 27 | extern int UprobeCompare(Uprobe *uprobe, char *func); 28 | 29 | extern const char *UprobeGetFunc(Uprobe *uprobe); 30 | 31 | extern void UprobeCallTimedCallback(Uprobe *uprobe); 32 | 33 | extern const UprobeAttachInterface *UprobeGetAttachInterface(Uprobe *uprobe); 34 | 35 | #endif /* UPROBE_INTERNAL_H */ 36 | -------------------------------------------------------------------------------- /src/include/trace_file.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_FILE_H 2 | #define TRACE_FILE_H 3 | #include "postgres.h" 4 | 5 | extern FILE *traceFile; 6 | extern char *dataDir; 7 | extern int traceFileLimit; 8 | extern size_t currentSize; 9 | 10 | extern void TraceFileDeclareGucVariables(void); 11 | 12 | extern bool OpenTraceSessionFile(bool throwOnError); 13 | 14 | extern void CloseTraceSessionFile(void); 15 | 16 | #define TRACE_FILE_LIMIT_BT 1024 * 1024 * (size_t) traceFileLimit 17 | 18 | pg_attribute_printf(1, 2) static inline int 19 | TracePrintf(char *fmt,...) 20 | { 21 | if (TRACE_FILE_LIMIT_BT > currentSize) 22 | { 23 | va_list args; 24 | 25 | va_start(args, fmt); 26 | 27 | /* 28 | * We estimate that sizes of written data is not that big, so we won't 29 | * need to worry alot about going beyond TRACE_FILE_LIMIT_BT. 30 | */ 31 | currentSize += pg_vfprintf(traceFile, fmt, args); 32 | va_end(args); 33 | } 34 | return 0; 35 | } 36 | 37 | #endif /* TRACE_FILE_H */ 38 | -------------------------------------------------------------------------------- /src/include/uprobe_factory.h: -------------------------------------------------------------------------------- 1 | #ifndef UPROBE_FACTORY_H 2 | #define UPROBE_FACTORY_H 3 | 4 | #include "uprobe_attach_interface.h" 5 | #include "custom_uprobe_interface.h" 6 | 7 | typedef enum 8 | { 9 | INVALID_TYPE, 10 | TIME, HIST, MEM, LOCK_ACQUIRE, LOCK_RELEASE 11 | } UprobeAttachType; 12 | 13 | 14 | typedef struct UprobeAttach 15 | { 16 | UprobeAttachType type; 17 | UprobeAttachInterface *impl; 18 | } UprobeAttach; 19 | 20 | 21 | typedef struct UprobeStorage *(*StorageInitFunc) (const char *symbol); 22 | 23 | typedef struct UprobeAttachInterface *(*UprobeInterfaceInitFunc) (const char *symbol); 24 | 25 | 26 | extern void CreateUprobeAttachForType(const char *type, const char *symbol, UprobeAttach *UprobeAttach); 27 | 28 | extern const char *GetCharNameForUprobeAttachType(UprobeAttachType type); 29 | 30 | extern UprobeStorage *GetUprobeStorageForType(UprobeAttachType type, const char *symbol); 31 | 32 | extern UprobeAttachType GetTypeByCharName(const char *name); 33 | 34 | #endif /* UPROBE_FACTORY_H */ 35 | -------------------------------------------------------------------------------- /src/include/trace_execute_nodes.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_EXECUTE_NODES_H 2 | #define TRACE_EXECUTE_NODES_H 3 | #include "postgres.h" 4 | #include "nodes/execnodes.h" 5 | 6 | #include "list.h" 7 | #include "uprobe_attach_interface.h" 8 | 9 | 10 | extern bool isFirstNodeCall; 11 | 12 | extern void ExecutorTraceUprobesGet(UprobeAttachInterface **uprobes, MemoryContext context, bool shouldTraceLWLocksForEachNode); 13 | 14 | extern void ExecutorTraceDumpAndClearStat(QueryDesc *queryDesc, char *plan); 15 | 16 | extern void ExecutorTraceStop(void); 17 | 18 | extern void InitExecutorNodes(PlanState *plan, UprobeList *stringPlanParts); 19 | 20 | extern void ExecuteNodesStateNew(QueryDesc *query); 21 | 22 | extern void ExecuteNodesStatePush(QueryDesc *query); 23 | 24 | extern void ExecuteNodesStatePop(void); 25 | 26 | extern void ExecuteNodesStateDelete(QueryDesc *query); 27 | 28 | extern void ExecuteNodesDeleteRegister(QueryDesc* query); 29 | 30 | extern void ExecuteNodesStateClean(void); 31 | 32 | extern bool ExecuteNodesStateNeedInit(void); 33 | 34 | #endif /* TRACE_EXECUTE_NODES_H */ 35 | -------------------------------------------------------------------------------- /src/include/count_uprobes.h: -------------------------------------------------------------------------------- 1 | #ifndef COUNT_UPROBES_H 2 | #define COUNT_UPROBES_H 3 | 4 | #include "uprobe_attach_interface.h" 5 | #include "custom_uprobe_interface.h" 6 | 7 | /* works like hist() in bpftrace */ 8 | typedef struct 9 | { 10 | size_t *histArray; 11 | size_t histArraySize; 12 | size_t totalCalls; 13 | double start; 14 | double stop; 15 | double step; 16 | } UprobeAttachHistStat; 17 | 18 | 19 | typedef struct 20 | { 21 | size_t numCalls; 22 | size_t avgTime; 23 | } UprobeAttachTimeStat; 24 | 25 | 26 | extern UprobeAttachInterface *UprobeAttachTimeInit(const char *symbol); 27 | extern UprobeStorage *UprobeStorageTimeInit(const char *symbol); 28 | extern UprobeAttachTimeStat *UprobeAttachTimeGetStat(const UprobeAttachInterface *uprobe); 29 | 30 | extern UprobeAttachInterface *UprobeAttachHistInit(const char *symbol); 31 | extern UprobeStorage *UprobeStorageHistInit(const char *symbol); 32 | extern UprobeAttachHistStat *UprobeAttachHistGetStat(const UprobeAttachInterface *uprobe, double start, double stop, double step); 33 | extern UprobeAttachHistStat *UprobeAttachHistGetStatSimple(const UprobeAttachInterface *uprobe); 34 | 35 | extern UprobeAttachInterface *UprobeAttachMemInit(const char *symbol); 36 | extern UprobeStorage *UprobeStorageMemInit(const char *symbol); 37 | 38 | #endif /* COUNT_UPROBES_H */ 39 | -------------------------------------------------------------------------------- /src/include/uprobe_attach_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef UPROBE_ATTACH_INTERFACE_H 2 | #define UPROBE_ATTACH_INTERFACE_H 3 | #include "postgres.h" 4 | 5 | /* 6 | * Interface for attaching urpobes. 7 | * 8 | * inFunc will be called when target symbol is called. 9 | * prototype for it is void inFunc(void* data, size_t arg1, size_t arg2); 10 | * you can add up to 8 args and you must specify amount in field numArgs 11 | * retFunc will be called when target symbol returns. 12 | * prototype for it is void retFunc(void* data, size_t retArg); 13 | * retArg will be passed if filed needRetVal is true. 14 | * timedCallback could be called from time to time. Common usage is sending data on shared uprobes 15 | * cleanFunc will be called when this uprobe is deleted at this process. 16 | */ 17 | 18 | struct UprobeAttachInterface; 19 | 20 | typedef void (*UprobeAttachCleanFunc) (struct UprobeAttachInterface *); 21 | 22 | typedef void (*UprobeAttachTimedFunc) (struct UprobeAttachInterface *); 23 | 24 | typedef struct UprobeAttachInterface 25 | { 26 | void *inFunc; 27 | void *retFunc; 28 | UprobeAttachTimedFunc timedCallback; 29 | UprobeAttachCleanFunc cleanFunc; 30 | void *data; 31 | char *targetSymbol; 32 | int numArgs; 33 | bool needRetVal; 34 | } UprobeAttachInterface; 35 | 36 | #endif /* UPROBE_ATTACH_INTERFACE_H */ 37 | -------------------------------------------------------------------------------- /src/include/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | #include "postgres.h" 4 | 5 | typedef int (*CompareFunction) (const void *, const void *); 6 | 7 | typedef struct ListItem ListItem; 8 | 9 | struct ListItem 10 | { 11 | void *value; 12 | ListItem *next; 13 | ListItem *prev; 14 | }; 15 | 16 | typedef struct 17 | { 18 | ListItem *head; 19 | ListItem *tail; 20 | size_t ListSize; 21 | 22 | MemoryContext memoryContext; 23 | CompareFunction comparator; 24 | } UprobeList; 25 | 26 | extern void ListInit(UprobeList **list, CompareFunction comparator, MemoryContext memoryContext); 27 | extern void ListAdd(UprobeList *list, void *value); 28 | extern void *ListPop(UprobeList *list, void *value); 29 | 30 | extern void *ListFind(UprobeList *list, void *value); 31 | 32 | extern void *ListPopLast(UprobeList *list); 33 | 34 | extern void *ListPopFirst(UprobeList *list); 35 | 36 | extern size_t ListSize(UprobeList *list); 37 | extern bool ListContains(UprobeList *list, void *value); 38 | 39 | extern void ListMakeEmpty(UprobeList *list); 40 | extern void ListFree(UprobeList *list); 41 | 42 | #define LIST_FOREACH(list, iterator) \ 43 | for(ListItem *iterator = (list) ? (list)->head : NULL; ((void *) iterator) != NULL; (iterator) = (iterator)->next) \ 44 | 45 | #define LIST_FOREACH_REVERSE(list, iterator) \ 46 | for(ListItem *iterator = (list) ? (list)->tail : NULL; ((void *) iterator) != NULL; (iterator) = (iterator)->prev) \ 47 | 48 | #define LIST_LAST(list) (list) ? ((list)->tail ? (list)->tail->value : NULL) : NULL 49 | 50 | #endif /* LIST_H */ 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # contrib/pg_uprobe/Makefile 2 | 3 | # cmake install frida 4 | RUN_CMAKE_TO_INSTALL_FRIDA_0 := $(shell mkdir -p build ) 5 | RUN_CMAKE_TO_INSTALL_FRIDA_1 := $(shell cmake -B ./build -S .) 6 | RUN_CMAKE_TO_INSTALL_FRIDA_2 := $(shell cmake --build ./build) 7 | PG_CFLAGS += -I./build/FridaGum-prefix/src/FridaGum 8 | PG_CPPFLAGS += -I./build/FridaGum-prefix/src/FridaGum 9 | SHLIB_LINK += ./build/FridaGum-prefix/src/FridaGum/libfrida-gum.a 10 | 11 | MODULE_big = pg_uprobe 12 | OBJS = \ 13 | $(WIN32RES) \ 14 | src/pg_uprobe.o \ 15 | src/uprobe_internal.o \ 16 | src/list.o \ 17 | src/uprobe_collector.o \ 18 | src/uprobe_message_buffer.o \ 19 | src/uprobe_shared_config.o \ 20 | src/count_uprobes.o \ 21 | src/uprobe_factory.o \ 22 | src/trace_execute_nodes.o \ 23 | src/trace_lock_on_buffers.o \ 24 | src/trace_parsing.o \ 25 | src/trace_planning.o \ 26 | src/trace_session.o \ 27 | src/trace_wait_events.o \ 28 | src/json_to_jsonbvalue_parser.o \ 29 | src/lockmanager_trace.o \ 30 | src/trace_file.o 31 | 32 | PG_CFLAGS += -I./src/include 33 | PG_CPPFLAGS += -I./src/include 34 | PGFILEDESC = "pg_uprobe - allows measuring postgres functions execution time" 35 | 36 | EXTENSION = pg_uprobe 37 | DATA = pg_uprobe--0.3.sql 38 | 39 | REGRESS = pg_uprobe 40 | 41 | SHLIB_LINK += $(filter -lm, $(LIBS)) 42 | EXTRA_CLEAN = node_names.h 43 | 44 | ifdef USE_PGXS 45 | PG_CONFIG ?= pg_config 46 | PGXS := $(shell $(PG_CONFIG) --pgxs) 47 | PG_INCLUDE_DIR = $(shell $(PG_CONFIG) --includedir-server) 48 | include $(PGXS) 49 | else 50 | subdir = contrib/pg_uprobe 51 | top_builddir = ../.. 52 | include $(top_builddir)/src/Makefile.global 53 | include $(top_srcdir)/contrib/contrib-global.mk 54 | PG_INCLUDE_DIR = ../../src/include 55 | endif 56 | GEN_LOG := $(shell python3 gen_node_names_array.py $(MAJORVERSION) $(PG_INCLUDE_DIR)/nodes node_names.h) 57 | 58 | 59 | python_tests: 60 | python3 ./tests/main.py -------------------------------------------------------------------------------- /src/include/uprobe_message_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef UPROBE_MESSAGE_BUFFER_H 2 | #define UPROBE_MESSAGE_BUFFER_H 3 | 4 | #include "postgres.h" 5 | #include "storage/latch.h" 6 | #include "utils/wait_event.h" 7 | 8 | #if PG_MAJORVERSION_NUM < 17 9 | #define END_OF_WRITE_STAT_EVENT WAIT_EVENT_MQ_INTERNAL 10 | #else 11 | #define END_OF_WRITE_STAT_EVENT WAIT_EVENT_MESSAGE_QUEUE_INTERNAL 12 | #endif 13 | 14 | #define ESTIMATE_MESSAGE_SIZE (sizeof(Message) + 255) 15 | 16 | #define MESSAGEBUFFER_SIZE (ESTIMATE_MESSAGE_SIZE * 1024) 17 | 18 | #define MAX_SYMBOL_SIZE 1024 19 | 20 | #define MESSAGE_SYMBOL 0 21 | #define MESSAGE_DELETE_SHARED_UPROBE 1 22 | #define MESSAGE_NEW_SHARED_UPROBE 2 23 | #define MESSAGE_WRITE_STAT 3 24 | #define MESSAGE_CUSTOM 4 25 | 26 | 27 | /* base struct for all messages, should be first field in all messages */ 28 | typedef struct Message 29 | { 30 | uint16 type; 31 | uint16 size; 32 | } Message; 33 | 34 | 35 | typedef struct MessageNewSharedUprobe 36 | { 37 | Message base; 38 | uint8 uprobeType; 39 | } MessageNewSharedUprobe; 40 | 41 | 42 | typedef struct MessageWriteStat 43 | { 44 | Message base; 45 | bool shouldEmptyData; 46 | /* if not null will be set after stat is written */ 47 | Latch *latch; 48 | } MessageWriteStat; 49 | 50 | 51 | typedef struct MessageDeleteSharedUprobe 52 | { 53 | Message base; 54 | bool shouldWriteStat; 55 | 56 | /* 57 | * if (not null and shouldWriteStat is true) will be set after stat is 58 | * written 59 | */ 60 | Latch *latch; 61 | } MessageDeleteSharedUprobe; 62 | 63 | 64 | typedef struct MessageSymbol 65 | { 66 | Message base; 67 | char symbol[FLEXIBLE_ARRAY_MEMBER]; 68 | } MessageSymbol; 69 | 70 | 71 | extern void MessageBufferRequest(void); 72 | 73 | 74 | extern void MessageBufferInit(void); 75 | 76 | extern void MessageBufferDelete(void); 77 | 78 | extern int MessageBufferPut(const Message *mes, uint32 n, char *symbol); 79 | 80 | extern int MessageBufferGet(Message *mes, uint32 bufferSize); 81 | 82 | 83 | #endif /* UPROBE_MESSAGE_BUFFER_H */ 84 | -------------------------------------------------------------------------------- /doc/json_schema/one_query.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Trace session output for one Query", 4 | "type": "object", 5 | "properties": { 6 | "parsingTime": { 7 | "type": "string" 8 | }, 9 | "planningTime": { 10 | "type": "string" 11 | }, 12 | "LWLockPlanning": { 13 | "type": "array", 14 | "items": { 15 | "$ref": "locks_on_buffers.json" 16 | } 17 | }, 18 | "executionStart": { 19 | "type": "string" 20 | }, 21 | "explain": { 22 | "type": "object", 23 | "properties": { 24 | "Query Text": { 25 | "type": "string" 26 | }, 27 | "Plan": { 28 | "type": "object", 29 | "properties": {}, 30 | "required": [] 31 | } 32 | }, 33 | "required": [ 34 | "Query Text", 35 | "Plan" 36 | ] 37 | }, 38 | "executionEvents": { 39 | "type": "array", 40 | "items": { 41 | "$ref": "execution_event.json" 42 | } 43 | }, 44 | "executionTime": { 45 | "type": "string" 46 | }, 47 | "executorNodeStatInPlan": { 48 | "type": "object", 49 | "properties": { 50 | "traceData": { 51 | "$ref": "trace_data_in_plan.json" 52 | } 53 | }, 54 | "required": [ 55 | "traceData" 56 | ] 57 | }, 58 | "exprNodeStat": { 59 | "type": "array", 60 | "items": { 61 | "$ref": "expr_node_stat.json" 62 | } 63 | }, 64 | "locksInsidePortalRun": { 65 | "type": "array", 66 | "items": { 67 | "$ref": "locks_on_buffers.json" 68 | } 69 | }, 70 | "locksOutsidePortalRun": { 71 | "type": "array", 72 | "items": { 73 | "$ref": "locks_on_buffers.json" 74 | } 75 | }, 76 | "waitEventStat": { 77 | "type": "array", 78 | "items": { 79 | "$ref": "wait_event.json" 80 | } 81 | } 82 | }, 83 | "required": [ 84 | "executionStart", 85 | "explain", 86 | "executionEvents", 87 | "executionTime", 88 | "executorNodeStatInPlan", 89 | "exprNodeStat" 90 | ] 91 | } -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from testgres import PostgresNode 2 | import os 3 | import re 4 | global_test_variable = 1 5 | 6 | 7 | def node_read_file(node: PostgresNode, path : str) -> list[str]: 8 | file = open(node.data_dir + path) 9 | result = file.readlines() 10 | file.close() 11 | return result 12 | 13 | 14 | def node_read_file_one_line(node: PostgresNode, path: str): 15 | file = open(node.data_dir + path) 16 | result = file.read(-1) 17 | file.close() 18 | return result 19 | 20 | 21 | def node_delete_pg_uprobe_files(node: PostgresNode): 22 | dirEntries = os.listdir(node.data_dir + "/pg_uprobe/") 23 | for file in dirEntries: 24 | os.remove(node.data_dir + "/pg_uprobe/" + file) 25 | 26 | 27 | def save_to_file_with_pid(data: str, pid: int): 28 | file = open(f"data_{pid}.txt", "w") 29 | file.write(data) 30 | file.close() 31 | 32 | 33 | def test_wrapper(node: PostgresNode, test): 34 | global global_test_variable 35 | print("test", global_test_variable, ": ", test.__qualname__, ':', end = ' ', flush=True) 36 | global_test_variable += 1 37 | try: 38 | test(node) 39 | print("done") 40 | finally: 41 | node.execute("select delete_uprobe(func, false) from list_uprobes()") 42 | node_delete_pg_uprobe_files(node) 43 | node.execute("alter system set pg_uprobe.data_dir to DEFAULT") 44 | node.restart() 45 | 46 | 47 | def node_get_file_size(node: PostgresNode, path: str): 48 | return os.stat(node.data_dir + path).st_size 49 | 50 | 51 | def load_script(script_name: str): 52 | file = open(os.path.dirname(__file__) + "/scripts/" + script_name) 53 | result = file.read(-1) 54 | file.close() 55 | return result 56 | 57 | 58 | def load_scripts(): 59 | return [load_script("query_" + str(i) + ".sql") for i in range(1, 6)] 60 | 61 | 62 | 63 | def node_load_custom_database(node: PostgresNode): 64 | node.psql(query = load_script("init.sql")) 65 | node.psql(query = load_script("data_load.sql")) 66 | 67 | 68 | def extract_numbers_as_strings_from_time_point(input_string:str): 69 | date_part, time_part = input_string.split('T') 70 | year, month, day = date_part.split(':') 71 | hours, minutes, seconds = time_part.split(':') 72 | seconds, milliseconds = seconds.split('.') 73 | return [year, month, day, hours, minutes, seconds, milliseconds] 74 | -------------------------------------------------------------------------------- /doc/example_trace_session.md: -------------------------------------------------------------------------------- 1 | # Пример трассирования сеанса 2 | 3 | Подразумевается, что расширение уже установлено и настроено. Как установить и настроить расширение можно посмотреть в основном файле [README](./../README.md#установка). 4 | 5 | Установим следующие настройки для большей наглядности: 6 | - pg_uprobe.trace_write_mode='json' 7 | - pg_uprobe.write_only_sleep_lwlocks_stat='false' 8 | 9 | Создадим таблицу к которой будем посылать запросы и трассировать их. 10 | ```sql 11 | create table t(id int primary key); 12 | 13 | insert into t values (1); 14 | ``` 15 | ## Трассирование текущего сеанса 16 | 17 | Начнём трассирование сеанса: 18 | ```sql 19 | select start_session_trace(); 20 | ``` 21 | Выполним запрос: 22 | 23 | ```sql 24 | select * from t where id = 1; 25 | ``` 26 | Остановим трассирование сеанса: 27 | ```sql 28 | select stop_session_trace(); 29 | ``` 30 | 31 | Результат трассирования будет находиться в каталоге, указанном в настройке `pg_uprobe.data_dir`(по умолчанию `$PGDATA/pg_uprobe`). 32 | 33 | В нашем случае получается следующий файл: [Пример результата](./example_trace_session.json) 34 | 35 | ## Трассирование сеанса по PID 36 | Прежде чем трассировать другой сеанс по PID, необходимо подготовить сам сеанс, для этого заходим в `psql` и узнаём номер PID, который мы будем трассировать: 37 | ```sql 38 | --Terminal-1 39 | 40 | select pg_backend_pid(); 41 | pg_backend_pid 42 | ---------------- 43 | 411064 44 | ``` 45 | 411064 - Пид, который мы будем трассировать из другой сессии. 46 | 47 | Для того, чтобы начать трассировать, необходимо открыть ещё один терминал `psql`: 48 | 49 | ```sql 50 | --Terminal-2 51 | 52 | -- Убедимся, что мы действительно в другом терминале 53 | select pg_backend_pid(); 54 | pg_backend_pid 55 | ---------------- 56 | 997809 57 | 58 | ``` 59 | 60 | Начнём трассирование сеанса: 61 | ```sql 62 | --Terminal-2 63 | 64 | select start_session_trace_pid(411064); 65 | ``` 66 | В первом терминале выполним запрос, обратите внимание, что первый запрос не запишется, а трассирование начнётся только с второго запроса: 67 | ```sql 68 | --Terminal-1 69 | 70 | select 1; -- Запрос не запишется в результирующий файл 71 | 72 | select * from t where id = 1; 73 | ``` 74 | 75 | Остановим трассирование сеанса: 76 | ```sql 77 | select stop_session_trace_pid(411064); 78 | ``` 79 | 80 | В этом случае получается аналогичный файл: [Пример результата](./example_trace_session.json) 81 | -------------------------------------------------------------------------------- /doc/json_schema/locks_on_buffers.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "locks_on_buffers.json", 4 | "title": "Schema for locks taken on buffer pages", 5 | "type": "object", 6 | "properties": { 7 | "bufferTag": { 8 | "type": "object", 9 | "properties": { 10 | "spcOid": { 11 | "type": "number" 12 | }, 13 | "spcName": { 14 | "type": "string" 15 | }, 16 | "dbOid": { 17 | "type": "number" 18 | }, 19 | "dbName": { 20 | "type": "string" 21 | }, 22 | "relNumber": { 23 | "type": "number" 24 | }, 25 | "relName": { 26 | "type": "string" 27 | }, 28 | "relKind": { 29 | "type": "string" 30 | }, 31 | "forkName": { 32 | "type": "string" 33 | }, 34 | "blockNumber": { 35 | "type": "number" 36 | } 37 | }, 38 | "required": [ 39 | "spcOid", 40 | "spcName", 41 | "dbOid", 42 | "dbName", 43 | "relNumber", 44 | "relName", 45 | "relKind", 46 | "forkName", 47 | "blockNumber" 48 | ] 49 | }, 50 | "exclusive": { 51 | "type": "object", 52 | "properties": { 53 | "totalCalls": { 54 | "type": "number" 55 | }, 56 | "sleepCount": { 57 | "type": "number" 58 | }, 59 | "sleepTimeSum": { 60 | "type": "string" 61 | }, 62 | "maxSleepTime": { 63 | "type": "string" 64 | } 65 | }, 66 | "required": [ 67 | "totalCalls", 68 | "sleepCount", 69 | "sleepTimeSum", 70 | "maxSleepTime" 71 | ] 72 | }, 73 | "shared": { 74 | "type": "object", 75 | "properties": { 76 | "totalCalls": { 77 | "type": "number" 78 | }, 79 | "sleepCount": { 80 | "type": "number" 81 | }, 82 | "sleepTimeSum": { 83 | "type": "string" 84 | }, 85 | "maxSleepTime": { 86 | "type": "string" 87 | } 88 | }, 89 | "required": [ 90 | "totalCalls", 91 | "sleepCount", 92 | "sleepTimeSum", 93 | "maxSleepTime" 94 | ] 95 | } 96 | }, 97 | "required": [ 98 | "bufferTag", 99 | "exclusive", 100 | "shared" 101 | ] 102 | } -------------------------------------------------------------------------------- /src/trace_parsing.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | 4 | #include "trace_lock_on_buffers.h" 5 | #include "trace_session.h" 6 | #include "trace_file.h" 7 | 8 | #include "trace_parsing.h" 9 | 10 | typedef struct ParsingTrace 11 | { 12 | /* using 0 as invalid val */ 13 | uint64 parsingTime; 14 | HTAB *bufferLocksStat; 15 | uint64 startTime; 16 | } ParsingTrace; 17 | 18 | 19 | static ParsingTrace parsingTrace = 20 | { 21 | .startTime = 0,.bufferLocksStat = 0,.parsingTime = 0 22 | }; 23 | 24 | static void ParsingTraceInFunc(void *data); 25 | static void ParsingTraceRetFunc(void *data); 26 | static void ParsingTraceCleanFunc(UprobeAttachInterface *uprobe); 27 | 28 | 29 | static void 30 | ParsingTraceInFunc(void *data) 31 | { 32 | struct timespec time; 33 | 34 | LockOnBuffersTraceStatPush(); 35 | 36 | clock_gettime(CLOCK_MONOTONIC, &time); 37 | 38 | parsingTrace.startTime = time.tv_sec * 1000000000L + time.tv_nsec; 39 | } 40 | 41 | 42 | static void 43 | ParsingTraceRetFunc(void *data) 44 | { 45 | struct timespec time; 46 | 47 | clock_gettime(CLOCK_MONOTONIC, &time); 48 | 49 | parsingTrace.parsingTime = time.tv_sec * 1000000000L + time.tv_nsec - parsingTrace.startTime; 50 | 51 | if (parsingTrace.bufferLocksStat) 52 | hash_destroy(parsingTrace.bufferLocksStat); 53 | 54 | parsingTrace.bufferLocksStat = LockOnBuffersTraceStatPopAndGet(); 55 | } 56 | 57 | 58 | static void 59 | ParsingTraceCleanFunc(UprobeAttachInterface *uprobe) 60 | { 61 | pfree(uprobe); 62 | } 63 | 64 | 65 | UprobeAttachInterface * 66 | ParsingUprobeGet(void) 67 | { 68 | UprobeAttachInterface *res = (UprobeAttachInterface *) palloc0(sizeof(UprobeAttachInterface)); 69 | 70 | res->cleanFunc = ParsingTraceCleanFunc; 71 | res->inFunc = ParsingTraceInFunc; 72 | res->numArgs = 0; 73 | res->retFunc = ParsingTraceRetFunc; 74 | res->targetSymbol = "raw_parser"; 75 | return res; 76 | } 77 | 78 | 79 | void 80 | ParsingWriteData(void) 81 | { 82 | if (parsingTrace.parsingTime == 0) 83 | return; 84 | 85 | if (writeMode == TEXT_WRITE_MODE) 86 | TracePrintf("TRACE PARSE. parsingTime %lu nanosec\n", parsingTrace.parsingTime); 87 | else 88 | TracePrintf("\n\"parsingTime\": \"%lu nanosec\",\n", parsingTrace.parsingTime); 89 | 90 | LockOnBuffersTraceWriteStatWithName(parsingTrace.bufferLocksStat, "LWLockParsing"); 91 | 92 | ParsingClearData(); 93 | } 94 | 95 | void 96 | ParsingClearData(void) 97 | { 98 | if (parsingTrace.bufferLocksStat) 99 | hash_destroy(parsingTrace.bufferLocksStat); 100 | 101 | parsingTrace.bufferLocksStat = NULL; 102 | parsingTrace.parsingTime = 0; 103 | } 104 | -------------------------------------------------------------------------------- /pg_uprobe--0.3.sql: -------------------------------------------------------------------------------- 1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 2 | \echo Use "CREATE EXTENSION pg_uprobe" to load this file. \quit 3 | 4 | 5 | CREATE FUNCTION set_uprobe(IN func text, IN uprobe_type text, IN is_shared boolean) 6 | RETURNS text 7 | AS 'MODULE_PATHNAME','set_uprobe' 8 | LANGUAGE C STABLE STRICT; 9 | 10 | CREATE FUNCTION delete_uprobe(IN func text, IN should_write_stat boolean) 11 | RETURNS void 12 | AS 'MODULE_PATHNAME','delete_uprobe' 13 | LANGUAGE C STABLE STRICT; 14 | 15 | CREATE FUNCTION stat_time_uprobe(IN func text) 16 | RETURNS text 17 | AS 'MODULE_PATHNAME','stat_time_uprobe' 18 | LANGUAGE C STABLE STRICT; 19 | 20 | CREATE FUNCTION stat_hist_uprobe( IN func text, IN start double precision, IN stop double precision, IN step double precision, 21 | OUT time_range text, 22 | OUT hist_entry text, 23 | OUT percent numeric(5,3)) 24 | RETURNS SETOF record 25 | AS 'MODULE_PATHNAME','stat_hist_uprobe' 26 | LANGUAGE C STABLE STRICT; 27 | 28 | CREATE FUNCTION stat_hist_uprobe( IN func text, 29 | OUT time_range text, 30 | OUT hist_entry text, 31 | OUT percent numeric(5,3)) 32 | RETURNS SETOF record 33 | AS 'MODULE_PATHNAME','stat_hist_uprobe_simple' 34 | LANGUAGE C STABLE STRICT; 35 | 36 | CREATE FUNCTION list_uprobes( 37 | OUT func text, 38 | OUT uprobe_type text, 39 | OUT is_shared boolean) 40 | RETURNS SETOF record 41 | AS 'MODULE_PATHNAME','list_uprobes' 42 | LANGUAGE C STABLE STRICT; 43 | 44 | CREATE FUNCTION dump_uprobe_stat(IN func text, IN should_empty_stat boolean) 45 | RETURNS void 46 | AS 'MODULE_PATHNAME','dump_uprobe_stat' 47 | LANGUAGE C STABLE STRICT; 48 | 49 | 50 | CREATE FUNCTION start_session_trace() 51 | RETURNS void 52 | AS 'MODULE_PATHNAME','start_session_trace' 53 | LANGUAGE C STABLE STRICT; 54 | 55 | CREATE FUNCTION stop_session_trace() 56 | RETURNS void 57 | AS 'MODULE_PATHNAME','stop_session_trace' 58 | LANGUAGE C STABLE STRICT; 59 | 60 | CREATE FUNCTION start_session_trace(IN pid INT) 61 | RETURNS void 62 | AS 'MODULE_PATHNAME','start_session_trace_pid' 63 | LANGUAGE C STABLE STRICT; 64 | 65 | 66 | CREATE FUNCTION stop_session_trace(IN pid INT) 67 | RETURNS void 68 | AS 'MODULE_PATHNAME','stop_session_trace_pid' 69 | LANGUAGE C STABLE STRICT; 70 | 71 | 72 | CREATE FUNCTION start_lockmanager_trace() 73 | RETURNS void 74 | AS 'MODULE_PATHNAME', 'start_lockmanager_trace' 75 | LANGUAGE C STABLE STRICT; 76 | 77 | CREATE FUNCTION stop_lockmanager_trace(IN should_write_stat boolean) 78 | RETURNS void 79 | AS 'MODULE_PATHNAME', 'stop_lockmanager_trace' 80 | LANGUAGE C STABLE STRICT; 81 | 82 | CREATE FUNCTION dump_lockmanager_stat(IN should_empty_stat boolean) 83 | RETURNS void 84 | AS 'MODULE_PATHNAME', 'dump_lockmanager_stat' 85 | LANGUAGE C STABLE STRICT; -------------------------------------------------------------------------------- /src/uprobe_factory.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | 3 | #include "count_uprobes.h" 4 | #include "lockmanager_trace.h" 5 | 6 | #include "uprobe_factory.h" 7 | 8 | #define UNVALIDTYPESTR "INVALID_TYPE" 9 | 10 | 11 | typedef struct UprobeFactoryEntry 12 | { 13 | StorageInitFunc storageInit; 14 | UprobeInterfaceInitFunc interfaceInit; 15 | UprobeAttachType type; 16 | char *stringtype; 17 | } UprobeFactoryEntry; 18 | 19 | 20 | UprobeFactoryEntry staticEntries[] = { 21 | { 22 | .interfaceInit = UprobeAttachTimeInit, 23 | .storageInit = UprobeStorageTimeInit, 24 | .type = TIME, 25 | .stringtype = "TIME" 26 | }, 27 | { 28 | .interfaceInit = UprobeAttachHistInit, 29 | .storageInit = UprobeStorageHistInit, 30 | .type = HIST, 31 | .stringtype = "HIST" 32 | }, 33 | { 34 | .interfaceInit = UprobeAttachMemInit, 35 | .storageInit = UprobeStorageMemInit, 36 | .type = MEM, 37 | .stringtype = "MEM" 38 | }, 39 | { 40 | .interfaceInit = LWLockAcquireInit, 41 | .storageInit = NullStorageInit, 42 | .type = LOCK_ACQUIRE, 43 | .stringtype = "LOCK_ACQUIRE" 44 | }, 45 | { 46 | .interfaceInit = LWLockReleaseInit, 47 | .storageInit = LockManagerStorageInit, 48 | .type = LOCK_RELEASE, 49 | .stringtype = "LOCK_RELEASE" 50 | } 51 | }; 52 | 53 | #define STATIC_ENTRIES_SIZE sizeof(staticEntries) / sizeof(staticEntries[0]) 54 | 55 | 56 | void 57 | CreateUprobeAttachForType(const char *type, const char *symbol, UprobeAttach *uprobeAttach) 58 | { 59 | 60 | for (size_t i = 0; i < STATIC_ENTRIES_SIZE; i++) 61 | { 62 | if (strcmp(type, staticEntries[i].stringtype) == 0) 63 | { 64 | uprobeAttach->type = staticEntries[i].type; 65 | uprobeAttach->impl = staticEntries[i].interfaceInit(symbol); 66 | return; 67 | } 68 | } 69 | 70 | uprobeAttach->type = INVALID_TYPE; 71 | uprobeAttach->impl = NULL; 72 | } 73 | 74 | 75 | const char * 76 | GetCharNameForUprobeAttachType(UprobeAttachType type) 77 | { 78 | for (size_t i = 0; i < STATIC_ENTRIES_SIZE; i++) 79 | { 80 | if (type == staticEntries[i].type) 81 | return staticEntries[i].stringtype; 82 | } 83 | 84 | return UNVALIDTYPESTR; 85 | } 86 | 87 | 88 | UprobeStorage * 89 | GetUprobeStorageForType(UprobeAttachType type, const char *symbol) 90 | { 91 | 92 | for (size_t i = 0; i < STATIC_ENTRIES_SIZE; i++) 93 | { 94 | if (type == staticEntries[i].type) 95 | return staticEntries[i].storageInit(symbol); 96 | } 97 | 98 | return NULL; 99 | } 100 | 101 | 102 | UprobeAttachType 103 | GetTypeByCharName(const char *name) 104 | { 105 | for (size_t i = 0; i < STATIC_ENTRIES_SIZE; i++) 106 | { 107 | if (strcmp(name, staticEntries[i].stringtype) == 0) 108 | return staticEntries[i].type; 109 | } 110 | 111 | return INVALID_TYPE; 112 | } 113 | -------------------------------------------------------------------------------- /src/trace_file.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "utils/guc.h" 4 | #include "miscadmin.h" 5 | 6 | #include "trace_file.h" 7 | 8 | static char *traceFileName = NULL; 9 | char *dataDir = NULL; 10 | 11 | /*limit in megabytes */ 12 | int traceFileLimit = 16; 13 | size_t currentSize = 0; 14 | FILE *traceFile = NULL; 15 | 16 | static bool CheckDataDirValue(char **newval, void **extra, GucSource source); 17 | 18 | 19 | void 20 | TraceFileDeclareGucVariables(void) 21 | { 22 | DefineCustomIntVariable("pg_uprobe.trace_file_limit", 23 | "trace session data limit in megabytes", 24 | NULL, 25 | &traceFileLimit, 26 | 16, 27 | 1, 28 | 32 * 1024, 29 | PGC_SUSET, 30 | GUC_UNIT_MB, 31 | NULL, 32 | NULL, 33 | NULL); 34 | 35 | 36 | DefineCustomStringVariable("pg_uprobe.trace_file_name", 37 | "file name for trace session data", 38 | NULL, 39 | &traceFileName, 40 | "trace_file.txt", 41 | PGC_SUSET, 42 | 0, 43 | NULL, NULL, NULL); 44 | 45 | DefineCustomStringVariable("pg_uprobe.data_dir", 46 | "dir for file pg_uprobe.trace_file_name and other pg_uprobe files", 47 | NULL, 48 | &dataDir, 49 | "./pg_uprobe/", 50 | PGC_SUSET, 51 | 0, 52 | CheckDataDirValue, NULL, NULL); 53 | } 54 | 55 | 56 | bool 57 | OpenTraceSessionFile(bool throwOnError) 58 | { 59 | char path[4096]; 60 | int error; 61 | 62 | sprintf(path, "%s/%s_%d", dataDir, traceFileName, MyProcPid); 63 | 64 | traceFile = fopen(path, "w"); 65 | error = errno; 66 | 67 | if (traceFile == NULL && throwOnError) 68 | elog(ERROR, "can't open file %s for trace session data: %s", path, strerror(error)); 69 | else if (traceFile == NULL) 70 | elog(LOG, "can't open file %s for trace session data: %s", path, strerror(error)); 71 | 72 | currentSize = 0; 73 | return traceFile != NULL; 74 | } 75 | 76 | 77 | void 78 | CloseTraceSessionFile(void) 79 | { 80 | if (traceFile == NULL) 81 | return; 82 | 83 | fclose(traceFile); 84 | traceFile = NULL; 85 | currentSize = 0; 86 | } 87 | 88 | 89 | static bool 90 | CheckDataDirValue(char **newval, void **extra, GucSource source) 91 | { 92 | struct stat st; 93 | size_t size; 94 | 95 | 96 | if (*newval == NULL || *newval[0] == '\0') 97 | { 98 | GUC_check_errdetail("pg_uprobe data can't be empty or null"); 99 | return false; 100 | } 101 | 102 | size = strlen(*newval); 103 | 104 | if (size + 128 >= MAXPGPATH) 105 | { 106 | GUC_check_errdetail("pg_uprobe data dir too long."); 107 | return false; 108 | } 109 | 110 | if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode)) 111 | { 112 | GUC_check_errdetail("Specified pg_uprobe data dir does not exist."); 113 | return false; 114 | } 115 | 116 | return true; 117 | } 118 | -------------------------------------------------------------------------------- /gen_node_names_array.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | 4 | 5 | out_file_beg = """/*------------------------------------------------------------------------- 6 | * 7 | * {0} 8 | * Generated node infrastructure code 9 | * 10 | * PgPro (C) 11 | * 12 | * NOTES 13 | * ****************************** 14 | * *** DO NOT EDIT THIS FILE! *** 15 | * ****************************** 16 | * 17 | * It has been GENERATED by pg_uporbe/gen_node_names_array.py 18 | * 19 | *------------------------------------------------------------------------- 20 | */ 21 | "Invalid",\n""" 22 | 23 | 24 | def parse_nodetags(in_file_name: str, out_file_name: str) -> None: 25 | nodes_file = open(in_file_name, "r") 26 | out_file = open(out_file_name, "w") 27 | out_file.write(out_file_beg.format(out_file_name)) 28 | 29 | file_lines = nodes_file.readlines() 30 | for i in range(len(file_lines)): 31 | line = file_lines[i] 32 | if line.startswith(("/*", " *")): #skip comments at the start 33 | continue 34 | 35 | # line format is: " T_NodeName = 1,/n" 36 | line_splited = line[line.find("_") + 1 :].split(" = ") 37 | node_name = line_splited[0] 38 | if (i != len(file_lines) - 1): 39 | out_file.write('"' + node_name + '",\n') 40 | else: 41 | out_file.write('"' + node_name + '"\n') 42 | nodes_file.close() 43 | out_file.close() 44 | 45 | 46 | def parse_nodes_enum(in_file_name: str, out_file_name: str) -> None: 47 | nodes_file = open(in_file_name, "r") 48 | out_file = open(out_file_name, "w") 49 | out_file.write(out_file_beg.format(out_file_name)) 50 | index_line_of_enum_start: int = -1 51 | index_line_of_enum_end: int = -1 52 | 53 | file_lines:list[str] = nodes_file.readlines() 54 | 55 | # finding start and end of enum NodeTag 56 | for i in range(len(file_lines)): 57 | if (file_lines[i] == "typedef enum NodeTag\n"): 58 | index_line_of_enum_start = i + 3 #skipping enum entry till end of "T_Invalid = 0," line 59 | 60 | if (index_line_of_enum_start != -1 and file_lines[i] == "} NodeTag;\n"): 61 | index_line_of_enum_end = i 62 | 63 | # working with enum entries 64 | for i in range(index_line_of_enum_start, index_line_of_enum_end): 65 | current_line = file_lines[i].strip().strip(" ") #removing spaces and tab symbols 66 | if (not current_line.startswith("T_")): 67 | continue 68 | current_line = current_line[2:] # we skip "T_" 69 | if (i != index_line_of_enum_end - 1): 70 | current_line = current_line.split(",")[0] 71 | else: 72 | current_line = re.match(r'^[a-zA-Z]+', current_line).group(0) # we take only letters before anything else 73 | 74 | if (i != index_line_of_enum_end - 1): 75 | out_file.write('"' + current_line + '",\n') 76 | else: 77 | out_file.write('"' + current_line + '"\n') 78 | 79 | nodes_file.close() 80 | out_file.close() 81 | 82 | 83 | def main(): 84 | args = sys.argv 85 | 86 | if len(args) != 4 : 87 | print("invalid number of args given\n need two arg: pg_major_version, path to nodes include dir, out file name") 88 | exit(1) 89 | 90 | if (int(args[1]) >= 16): 91 | parse_nodetags(args[2] + "/nodetags.h", args[3]) 92 | else: 93 | parse_nodes_enum(args[2] + "/nodes.h", args[3]) 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | if __name__ == "__main__" : 105 | main() 106 | -------------------------------------------------------------------------------- /doc/example_profile_func.md: -------------------------------------------------------------------------------- 1 | # Примеры профилирования функции 2 | 3 | Подразумевается, что расширение уже установлено и настроено. Как установить и настроить расширение можно посмотреть в основном файле [README](./../README.md#установка). 4 | 5 | Для начала нужно выбрать функцию, которую мы будем профилировать. Без исходного кода PostgreSQL - не обойтись. Исходный код PostgreSQL расположен [тут](https://github.com/postgres/postgres). 6 | 7 | Мы будем профилировать функцию `PortalStart`. Функция `PortalStart` подготавливает портал(portal) для выполнения запроса. Портал в контексте баз данных PostgreSQL — это объект, который представляет собой подготовленное состояние для выполнения запроса. Эта функция выполняет необходимую инициализацию перед тем, как портал сможет быть использован для выполнения запроса через вызов `PortalRun`. 8 | 9 | Чтобы данные были показательные, давайте сгенерируем БД и будем подавать нагрузку с помощью [pgbench](https://postgrespro.ru/docs/postgresql/17/pgbench), если установлен PostgreSQL, он устанавливается автоматически . 10 | 11 | Генерация данных: 12 | ```console 13 | vadimlakt:~$ pgbench -i -s 100 14 | ``` 15 | 16 | ## Профилируем текущий сеанс 17 | 18 | В `psql` ставим пробу на функцию `PortalStart`: 19 | 20 | ```sql 21 | select set_uprobe('PortalStart', 'HIST', false); 22 | ``` 23 | 24 | Проверим, что проба установилась: 25 | 26 | ```sql 27 | select list_uprobes(); 28 | list_uprobes 29 | ---------------------- 30 | (PortalStart,HIST,f) 31 | (1 строка) 32 | 33 | ``` 34 | 35 | Генерируем нагрузку с помощью этого же терминала `psql`: 36 | ```sql 37 | select * from pgbench_accounts LIMIT 5; 38 | select * from pgbench_accounts LIMIT 5; 39 | select * from pgbench_accounts LIMIT 5; 40 | select * from pgbench_accounts LIMIT 5; 41 | 42 | ``` 43 | С помощью stat_hist_uprobe_simple построим гистограмму: 44 | 45 | ```sql 46 | select * from stat_hist_uprobe_simple('PortalStart'); 47 | time_range | hist_entry | percent 48 | --------------------+----------------------------------------------------+--------- 49 | (..., 12.6 us) | | 0.000 50 | (12.6 us, 17.1 us) | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 66.666 51 | (17.1 us, 21.7 us) | @@@@@@@@ | 16.666 52 | (26.2 us, ...) | | 0.000 53 | (4 строки) 54 | 55 | ``` 56 | 57 | Удалим пробу, чтобы она не мешала нормальной работе системы: 58 | ```sql 59 | select delete_uprobe('PortalStart', false); 60 | ``` 61 | 62 | Проверим, что проба удалена: 63 | ```sql 64 | select list_uprobes(); 65 | list_uprobes 66 | -------------- 67 | (0 строк) 68 | ``` 69 | 70 | ## Профилируем все сеансы в системе 71 | 72 | В `psql` ставим пробу на функцию `PortalStart`: 73 | 74 | ```sql 75 | select set_uprobe('PortalStart', 'HIST', true); 76 | ``` 77 | Делаем нагрузку с помощью `pgbench`: 78 | ```console 79 | vadimlakt:~$ pgbench -t 100 -P 1 postgres 80 | pgbench (16.8) 81 | starting vacuum...end. 82 | transaction type: 83 | scaling factor: 100 84 | query mode: simple 85 | number of clients: 1 86 | number of threads: 1 87 | maximum number of tries: 1 88 | number of transactions per client: 100 89 | number of transactions actually processed: 100/100 90 | number of failed transactions: 0 (0.000%) 91 | latency average = 1.215 ms 92 | latency stddev = 0.211 ms 93 | initial connection time = 52.587 ms 94 | tps = 822.787935 (without initial connection time) 95 | ``` 96 | Удаляем пробу и сразу собираем статистику в `psql`: 97 | ```sql 98 | select delete_uprobe('PortalStart', true); 99 | ``` 100 | При корректном завершении в каталоге **pg_uprobe.data_dir** создается файл с собранной информацией. В нашем случае файл называется `HIST_PortalStart.txt`. Содержимое файла выглядит следующим образом: 101 | ``` 102 | time,count 103 | 0.2,48 104 | 0.3,217 105 | 0.4,286 106 | 0.5,31 107 | 0.6,13 108 | 0.7,5 109 | 0.8,1 110 | 1.0,1 111 | 1.6,1 112 | 5.4,4 113 | 5.5,10 114 | 5.6,25 115 | 5.7,20 116 | 5.8,5 117 | 5.9,6 118 | 6.0,8 119 | 6.1,1 120 | 6.2,1 121 | 6.7,1 122 | 6.8,2 123 | 6.9,1 124 | 7.2,1 125 | 7.4,1 126 | 7.5,1 127 | 8.1,2 128 | 8.2,1 129 | 8.3,2 130 | 8.5,1 131 | 9.3,1 132 | 10.0,1 133 | 10.1,1 134 | 10.7,1 135 | 10.9,1 136 | 11.0,1 137 | 11.1,1 138 | 118.9,1 139 | 71.8,1 140 | ``` 141 | Для других типов проб всё делается аналогично. -------------------------------------------------------------------------------- /src/list.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | 3 | #include "list.h" 4 | 5 | void 6 | ListInit(UprobeList **list, CompareFunction comparator, MemoryContext memoryContext) 7 | { 8 | MemoryContext old = MemoryContextSwitchTo(memoryContext); 9 | 10 | *list = (UprobeList *) palloc0(sizeof(UprobeList)); 11 | MemoryContextSwitchTo(old); 12 | (*list)->comparator = comparator; 13 | (*list)->memoryContext = memoryContext; 14 | return; 15 | } 16 | 17 | void 18 | ListAdd(UprobeList *list, void *value) 19 | { 20 | MemoryContext old = MemoryContextSwitchTo(list->memoryContext); 21 | ListItem *item = (ListItem *) palloc0(sizeof(ListItem)); 22 | 23 | MemoryContextSwitchTo(old); 24 | item->value = value; 25 | 26 | if (list->head == NULL) 27 | { 28 | list->head = item; 29 | list->tail = list->head; 30 | ++list->ListSize; 31 | return; 32 | } 33 | 34 | item->prev = list->tail; 35 | list->tail->next = item; 36 | list->tail = item; 37 | ++list->ListSize; 38 | return; 39 | } 40 | 41 | void * 42 | ListPop(UprobeList *list, void *value) 43 | { 44 | ListItem *current; 45 | ListItem *prev; 46 | 47 | if (list == NULL || list->head == NULL) 48 | return NULL; 49 | if (!list->comparator(list->head->value, value)) 50 | { 51 | ListItem *next = list->head->next; 52 | void *res = list->head->value; 53 | 54 | pfree(list->head); 55 | --list->ListSize; 56 | list->head = next; 57 | if (next) 58 | next->prev = NULL; 59 | else 60 | list->tail = NULL; 61 | return res; 62 | } 63 | 64 | current = list->head->next; 65 | prev = list->head; 66 | 67 | while (current != NULL) 68 | { 69 | if (!list->comparator(current->value, value)) 70 | { 71 | ListItem *temp = current->next; 72 | void *res = current->value; 73 | 74 | pfree(current); 75 | current = temp; 76 | prev->next = current; 77 | --list->ListSize; 78 | if (current == NULL) 79 | list->tail = prev; 80 | else 81 | current->prev = prev; 82 | return res; 83 | } 84 | prev = current; 85 | current = current->next; 86 | } 87 | return NULL; 88 | } 89 | 90 | 91 | void * 92 | ListPopLast(UprobeList *list) 93 | { 94 | void *res; 95 | 96 | if (list == NULL || list->tail == NULL) 97 | return NULL; 98 | 99 | res = list->tail->value; 100 | --list->ListSize; 101 | if (list->ListSize == 0) 102 | { 103 | pfree(list->tail); 104 | list->head = NULL; 105 | list->tail = NULL; 106 | } 107 | else 108 | { 109 | ListItem *saveTail = list->tail; 110 | 111 | list->tail->prev->next = NULL; 112 | list->tail = list->tail->prev; 113 | pfree(saveTail); 114 | } 115 | return res; 116 | } 117 | 118 | 119 | void * 120 | ListPopFirst(UprobeList *list) 121 | { 122 | void *res; 123 | 124 | if (list == NULL || list->head == NULL) 125 | return NULL; 126 | 127 | res = list->head->value; 128 | --list->ListSize; 129 | if (list->ListSize == 0) 130 | { 131 | pfree(list->head); 132 | list->head = NULL; 133 | list->tail = NULL; 134 | } 135 | else 136 | { 137 | ListItem *saveHead = list->head; 138 | 139 | list->head->next->prev = NULL; 140 | list->head = list->head->next; 141 | pfree(saveHead); 142 | } 143 | return res; 144 | } 145 | 146 | 147 | size_t 148 | ListSize(UprobeList *list) 149 | { 150 | return list->ListSize; 151 | } 152 | 153 | 154 | bool 155 | ListContains(UprobeList *list, void *value) 156 | { 157 | for (ListItem *item = list->head; item != NULL; item = item->next) 158 | { 159 | if (!list->comparator(item->value, value)) 160 | return true; 161 | } 162 | return false; 163 | } 164 | 165 | 166 | void 167 | ListFree(UprobeList *list) 168 | { 169 | ListItem *list_iterator = list->head; 170 | 171 | while (list_iterator != NULL) 172 | { 173 | ListItem *next = list_iterator->next; 174 | 175 | pfree(list_iterator); 176 | list_iterator = next; 177 | } 178 | pfree(list); 179 | } 180 | 181 | 182 | void 183 | ListMakeEmpty(UprobeList *list) 184 | { 185 | ListItem *list_iterator = list->head; 186 | 187 | while (list_iterator != NULL) 188 | { 189 | ListItem *next = list_iterator->next; 190 | 191 | pfree(list_iterator); 192 | list_iterator = next; 193 | } 194 | list->ListSize = 0; 195 | list->head = NULL; 196 | list->tail = NULL; 197 | 198 | } 199 | 200 | 201 | void * 202 | ListFind(UprobeList *list, void *value) 203 | { 204 | LIST_FOREACH(list, it) 205 | { 206 | if (list->comparator(it->value, value) == 0) 207 | return it->value; 208 | } 209 | return NULL; 210 | } 211 | -------------------------------------------------------------------------------- /frida.cmake: -------------------------------------------------------------------------------- 1 | # add frida as external project 2 | set(FRIDA_DOWNLOAD_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/third_party/frida) 3 | 4 | set(FRIDA_DOWNLOAD_URL_PREFIX "" CACHE STRING "The prefix added to the frida download url. For example, https://ghproxy.com/") 5 | 6 | message(STATUS "System Name: ${CMAKE_SYSTEM_NAME}") 7 | message(STATUS "System Version: ${CMAKE_SYSTEM_VERSION}") 8 | message(STATUS "System Processor: ${CMAKE_SYSTEM_PROCESSOR}") 9 | 10 | set(FRIDA_OS_ARCH_RAW "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") 11 | string(TOLOWER ${FRIDA_OS_ARCH_RAW} FRIDA_OS_ARCH) 12 | set(FRIDA_VERSION "17.1.5") 13 | 14 | message(STATUS "Using frida: arch=${FRIDA_OS_ARCH}, version=${FRIDA_VERSION}") 15 | 16 | if(${FRIDA_OS_ARCH} STREQUAL "linux-x86_64") 17 | set(FRIDA_CORE_DEVKIT_SHA256 "14f533c7aa45e3d9ef9a711833a436d17bf20251c5579ee498ffb907b2a0e127") 18 | set(FRIDA_GUM_DEVKIT_SHA256 "46bdd9a463b36127a9f5d2a9c770aa738d392c723f213436ec939096125a7a09") 19 | elseif(${FRIDA_OS_ARCH} STREQUAL "linux-aarch64") 20 | set(FRIDA_CORE_DEVKIT_SHA256 "df8b4ad168e21398548a407bc6fbd68121ef1cc92c455a11edbc85c423101cfe") 21 | set(FRIDA_GUM_DEVKIT_SHA256 "3aea84ef12537415e511971c7b04d89f7d539c669d0e730c0662779c75667261") 22 | # Cmake uses aarch64, but frida uses arm64 23 | set(FRIDA_OS_ARCH "linux-arm64") 24 | elseif(${FRIDA_OS_ARCH} MATCHES "linux-arm.*") 25 | set(FRIDA_CORE_DEVKIT_SHA256 "b0113039b83a2542a1dca122cc29bc95e62ceced452d351a0e9b5452d5ade9e9") 26 | set(FRIDA_GUM_DEVKIT_SHA256 "6faa9cbd76a06c40f43f40c5d4bf3138566e40fe0438c616e1b7f0b6ccf4f0dc") 27 | # Frida only has armhf builds.. 28 | set(FRIDA_OS_ARCH "linux-armhf") 29 | elseif(${FRIDA_OS_ARCH} MATCHES "darwin-arm.*") 30 | set(FRIDA_CORE_DEVKIT_SHA256 "674e1deb0a2ce28456755bdfa00fb4b866f651afff84bb3e0eb349f52ec8b90b") 31 | set(FRIDA_GUM_DEVKIT_SHA256 "1d148cbcf1ac32611417beef728864bcdb8b81b7479830b187f3981a4289d640") 32 | # for macos-arm m* chip series 33 | set(FRIDA_OS_ARCH "macos-arm64e") 34 | elseif(${FRIDA_OS_ARCH} STREQUAL "freebsd-amd64") 35 | set(FRIDA_CORE_DEVKIT_SHA256 "8b85853d5cde9cdd3eb992bc74e710f236a5a329d90b0f2caa55556351c7113a") 36 | set(FRIDA_GUM_DEVKIT_SHA256 "e3a3a74818aab8d268bf4eb4ddadde6ed4e720985b5e7ade53e8c9c8f6b34e5d") 37 | # Cmake uses freebsd-amd64 but frida has freebsd-x86_64 38 | set(FRIDA_OS_ARCH "freebsd-x86_64") 39 | else() 40 | message(FATAL_ERROR "Unsupported frida arch ${FRIDA_OS_ARCH}") 41 | endif() 42 | 43 | set(FRIDA_CORE_FILE_NAME "frida-core-devkit-${FRIDA_VERSION}-${FRIDA_OS_ARCH}.tar.xz") 44 | set(FRIDA_GUM_FILE_NAME "frida-gum-devkit-${FRIDA_VERSION}-${FRIDA_OS_ARCH}.tar.xz") 45 | set(FRIDA_CORE_DEVKIT_URL "${FRIDA_DOWNLOAD_URL_PREFIX}https://github.com/frida/frida/releases/download/${FRIDA_VERSION}/${FRIDA_CORE_FILE_NAME}") 46 | set(FRIDA_GUM_DEVKIT_URL "${FRIDA_DOWNLOAD_URL_PREFIX}https://github.com/frida/frida/releases/download/${FRIDA_VERSION}/${FRIDA_GUM_FILE_NAME}") 47 | 48 | set(FRIDA_CORE_DEVKIT_PATH ${FRIDA_DOWNLOAD_LOCATION}/${FRIDA_CORE_FILE_NAME}) 49 | set(FRIDA_GUM_DEVKIT_PATH ${FRIDA_DOWNLOAD_LOCATION}/${FRIDA_GUM_FILE_NAME}) 50 | 51 | set(FRIDA_CORE_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/FridaCore-prefix/src/FridaCore) 52 | set(FRIDA_GUM_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/FridaGum-prefix/src/FridaGum) 53 | 54 | # if file exists, skip download 55 | if(NOT EXISTS ${FRIDA_CORE_DEVKIT_PATH}) 56 | message(STATUS "Downloading Frida Core Devkit") 57 | set(FRIDA_CORE_DOWNLOAD_URL ${FRIDA_CORE_DEVKIT_URL}) 58 | else() 59 | message(STATUS "Frida Core Devkit already downloaded") 60 | set(FRIDA_CORE_DOWNLOAD_URL ${FRIDA_CORE_DEVKIT_PATH}) 61 | endif() 62 | 63 | # if file exists, skip download 64 | if(NOT EXISTS ${FRIDA_GUM_DEVKIT_PATH}) 65 | message(STATUS "Downloading Frida GUM Devkit") 66 | set(FRIDA_GUM_DOWNLOAD_URL ${FRIDA_GUM_DEVKIT_URL}) 67 | else() 68 | message(STATUS "Frida GUM Devkit already downloaded") 69 | set(FRIDA_GUM_DOWNLOAD_URL ${FRIDA_GUM_DEVKIT_PATH}) 70 | endif() 71 | 72 | message(STATUS "Downloading FridaCore from ${FRIDA_CORE_DOWNLOAD_URL}") 73 | include(ExternalProject) 74 | ExternalProject_Add(FridaCore 75 | URL ${FRIDA_CORE_DOWNLOAD_URL} 76 | DOWNLOAD_DIR ${FRIDA_DOWNLOAD_LOCATION} 77 | CONFIGURE_COMMAND "" 78 | BUILD_COMMAND "" 79 | INSTALL_COMMAND "" 80 | BUILD_BYPRODUCTS ${FRIDA_CORE_INSTALL_DIR}/libfrida-core.a 81 | URL_HASH SHA256=${FRIDA_CORE_DEVKIT_SHA256} 82 | ) 83 | 84 | message(STATUS "Downloading FridaGum from ${FRIDA_GUM_DOWNLOAD_URL}") 85 | ExternalProject_Add(FridaGum 86 | URL ${FRIDA_GUM_DOWNLOAD_URL} 87 | DOWNLOAD_DIR ${FRIDA_DOWNLOAD_LOCATION} 88 | CONFIGURE_COMMAND "" 89 | BUILD_COMMAND "" 90 | INSTALL_COMMAND "" 91 | BUILD_BYPRODUCTS ${FRIDA_GUM_INSTALL_DIR}/libfrida-gum.a 92 | URL_HASH SHA256=${FRIDA_GUM_DEVKIT_SHA256} 93 | ) 94 | -------------------------------------------------------------------------------- /src/trace_planning.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "utils/plancache.h" 4 | 5 | #include "trace_lock_on_buffers.h" 6 | #include "trace_session.h" 7 | #include "trace_file.h" 8 | 9 | #include "trace_planning.h" 10 | 11 | typedef struct PlannerTrace 12 | { 13 | uint64 startTime; 14 | 15 | /* using 0 as invalid val */ 16 | uint64 planningTime; 17 | HTAB *bufferLocksStat; 18 | char *boundParamsLogString; 19 | bool isPlanCustom; 20 | } PlannerTrace; 21 | 22 | 23 | typedef struct GetCachedPlanTrace 24 | { 25 | CachedPlanSource *plansource; 26 | ParamListInfo boundParams; 27 | int64 prevNumCustomPlans; 28 | int64 prevNumGenericPlans; 29 | } GetCachedPlanTrace; 30 | 31 | 32 | static PlannerTrace plannerTrace = 33 | { 34 | .startTime = 0, 35 | .planningTime = 0, 36 | .boundParamsLogString = NULL, 37 | .bufferLocksStat = NULL, 38 | .isPlanCustom = false 39 | }; 40 | 41 | static GetCachedPlanTrace getCachedPlanTrace = 42 | { 43 | .plansource = NULL, 44 | .prevNumCustomPlans = 0, 45 | .prevNumGenericPlans = 0 46 | }; 47 | 48 | static MemoryContext traceMemoryContext = NULL; 49 | 50 | static void PlannerTraceInFunc(void *data); 51 | static void PlannerTraceRetFunc(void *data); 52 | static void PlannerTraceCleanFunc(UprobeAttachInterface *uprobe); 53 | static void GetCachedPlanInFunc(void *data, CachedPlanSource *plansource, ParamListInfo boundParams); 54 | static void GetCachedPlanRetFunc(void *data); 55 | static void GetCachedPlanCleanFunc(UprobeAttachInterface *uprobe); 56 | 57 | static void 58 | PlannerTraceInFunc(void *data) 59 | { 60 | struct timespec time; 61 | 62 | LockOnBuffersTraceStatPush(); 63 | 64 | clock_gettime(CLOCK_MONOTONIC, &time); 65 | 66 | plannerTrace.startTime = time.tv_sec * 1000000000L + time.tv_nsec; 67 | } 68 | 69 | 70 | static void 71 | PlannerTraceRetFunc(void *data) 72 | { 73 | struct timespec time; 74 | 75 | clock_gettime(CLOCK_MONOTONIC, &time); 76 | 77 | plannerTrace.planningTime = time.tv_sec * 1000000000L + time.tv_nsec - plannerTrace.startTime; 78 | 79 | if (plannerTrace.bufferLocksStat) 80 | hash_destroy(plannerTrace.bufferLocksStat); 81 | 82 | plannerTrace.bufferLocksStat = LockOnBuffersTraceStatPopAndGet(); 83 | } 84 | 85 | 86 | static void 87 | PlannerTraceCleanFunc(UprobeAttachInterface *uprobe) 88 | { 89 | pfree(uprobe); 90 | } 91 | 92 | 93 | static void 94 | GetCachedPlanInFunc(void *data, CachedPlanSource *plansource, ParamListInfo boundParams) 95 | { 96 | getCachedPlanTrace.plansource = plansource; 97 | getCachedPlanTrace.prevNumCustomPlans = plansource->num_custom_plans; 98 | getCachedPlanTrace.prevNumGenericPlans = plansource->num_generic_plans; 99 | getCachedPlanTrace.boundParams = boundParams; 100 | } 101 | 102 | 103 | static void 104 | GetCachedPlanRetFunc(void *data) 105 | { 106 | MemoryContext old; 107 | 108 | old = MemoryContextSwitchTo(traceMemoryContext); 109 | 110 | if (plannerTrace.boundParamsLogString) 111 | pfree(plannerTrace.boundParamsLogString); 112 | 113 | if (getCachedPlanTrace.boundParams) 114 | plannerTrace.boundParamsLogString = BuildParamLogString(getCachedPlanTrace.boundParams, NULL, -1); 115 | else 116 | plannerTrace.boundParamsLogString = pstrdup(""); 117 | 118 | if (getCachedPlanTrace.plansource->num_custom_plans != getCachedPlanTrace.prevNumCustomPlans) 119 | { 120 | plannerTrace.isPlanCustom = true; 121 | } 122 | else if (getCachedPlanTrace.plansource->num_generic_plans != getCachedPlanTrace.prevNumGenericPlans) 123 | { 124 | plannerTrace.isPlanCustom = false; 125 | } 126 | MemoryContextSwitchTo(old); 127 | } 128 | 129 | 130 | static void 131 | GetCachedPlanCleanFunc(UprobeAttachInterface *uprobe) 132 | { 133 | pfree(uprobe); 134 | } 135 | 136 | /* return 2 Uprobes to attach in resUrpobes array */ 137 | void 138 | PlanningUprobesGet(UprobeAttachInterface **resUprobes, MemoryContext context) 139 | { 140 | traceMemoryContext = context; 141 | 142 | resUprobes[0] = (UprobeAttachInterface *) palloc0(sizeof(UprobeAttachInterface)); 143 | resUprobes[0]->cleanFunc = PlannerTraceCleanFunc; 144 | resUprobes[0]->inFunc = PlannerTraceInFunc; 145 | resUprobes[0]->retFunc = PlannerTraceRetFunc; 146 | resUprobes[0]->targetSymbol = "planner"; 147 | 148 | resUprobes[1] = (UprobeAttachInterface *) palloc0(sizeof(UprobeAttachInterface)); 149 | resUprobes[1]->cleanFunc = GetCachedPlanCleanFunc; 150 | resUprobes[1]->inFunc = GetCachedPlanInFunc; 151 | resUprobes[1]->retFunc = GetCachedPlanRetFunc; 152 | resUprobes[1]->numArgs = 2; 153 | resUprobes[1]->targetSymbol = "GetCachedPlan"; 154 | } 155 | 156 | 157 | void 158 | PlanningWriteData(void) 159 | { 160 | if (plannerTrace.planningTime == 0) 161 | return; 162 | 163 | if (writeMode == TEXT_WRITE_MODE) 164 | TracePrintf("TRACE PLAN. planningTime %lu nanosec\n", plannerTrace.planningTime); 165 | else 166 | TracePrintf("\"planningTime\": \"%lu nanosec\",\n", plannerTrace.planningTime); 167 | 168 | LockOnBuffersTraceWriteStatWithName(plannerTrace.bufferLocksStat, "LWLockPlanning"); 169 | 170 | if (plannerTrace.boundParamsLogString == NULL) 171 | { 172 | PlanningClearData(); 173 | return; 174 | } 175 | 176 | if (writeMode == TEXT_WRITE_MODE) 177 | TracePrintf("TRACE GET_CACHED_PLAN. Custom plan was chosen for boundParams: %s", plannerTrace.boundParamsLogString); 178 | else 179 | TracePrintf(" \"planType\": \"%s\",\n \"params\": \"%s\",\n", 180 | plannerTrace.isPlanCustom ? "custom" : "generic", plannerTrace.boundParamsLogString); 181 | 182 | 183 | PlanningClearData(); 184 | } 185 | 186 | 187 | void 188 | PlanningClearData(void) 189 | { 190 | hash_destroy(plannerTrace.bufferLocksStat); 191 | if (plannerTrace.boundParamsLogString) 192 | { 193 | pfree(plannerTrace.boundParamsLogString); 194 | plannerTrace.boundParamsLogString = NULL; 195 | } 196 | plannerTrace.bufferLocksStat = NULL; 197 | plannerTrace.planningTime = 0; 198 | } 199 | -------------------------------------------------------------------------------- /src/uprobe_collector.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "utils/elog.h" 4 | #include "utils/memutils.h" 5 | #include "postmaster/bgworker.h" 6 | #include "storage/ipc.h" 7 | 8 | #include "list.h" 9 | #include "uprobe_shared_config.h" 10 | #include "uprobe_factory.h" 11 | #include "uprobe_message_buffer.h" 12 | 13 | 14 | static UprobeList *storageList = NULL; 15 | 16 | static char currentSymbol[MAX_SYMBOL_SIZE + 1] = {'\0'}; 17 | 18 | static UprobeStorage *currentStorage = NULL; 19 | 20 | static MemoryContext collectorContext = NULL; 21 | 22 | 23 | PGDLLEXPORT void StatCollectorMain(void); 24 | 25 | static int StorageListComparator(UprobeStorage *entry, char *symbol); 26 | static void ProcessMessageNewSharedUprobe(MessageNewSharedUprobe *mes); 27 | static void ProcessMessageWriteStat(MessageWriteStat *mes); 28 | static void ProcessMessageDeleteSharedUprobe(MessageDeleteSharedUprobe *mes); 29 | static void ProcessMessageSymbol(MessageSymbol *mes); 30 | static void ProcessMessageCustom(Message *mes); 31 | static void ProcessOneMessage(Message *mes); 32 | 33 | static void TermSignalHandler(SIGNAL_ARGS); 34 | static void StatCollectorAtExitCallback(void); 35 | static void StatCollectorSharedConfigApplyFunc(const char *func, const char *type); 36 | 37 | static int 38 | StorageListComparator(UprobeStorage *entry, char *symbol) 39 | { 40 | return strcmp(entry->symbol, symbol); 41 | } 42 | 43 | 44 | static void 45 | ProcessMessageNewSharedUprobe(MessageNewSharedUprobe *mes) 46 | { 47 | UprobeStorage *newEntry; 48 | 49 | if (ListContains(storageList, (void *) currentSymbol)) 50 | return; 51 | 52 | newEntry = GetUprobeStorageForType(mes->uprobeType, currentSymbol); 53 | 54 | if (!newEntry) 55 | return; 56 | 57 | ListAdd(storageList, newEntry); 58 | } 59 | 60 | 61 | static void 62 | ProcessMessageWriteStat(MessageWriteStat *mes) 63 | { 64 | if (currentStorage) 65 | currentStorage->writeStat(currentStorage, mes->shouldEmptyData); 66 | 67 | if (mes->latch) 68 | SetLatch(mes->latch); 69 | } 70 | 71 | 72 | static void 73 | ProcessMessageDeleteSharedUprobe(MessageDeleteSharedUprobe *mes) 74 | { 75 | UprobeStorage *entryToDelete = ListPop(storageList, currentSymbol); 76 | 77 | if (entryToDelete == NULL) 78 | { 79 | if (mes->latch && mes->shouldWriteStat) 80 | SetLatch(mes->latch); 81 | return; 82 | } 83 | entryToDelete->delete(entryToDelete, mes->shouldWriteStat); 84 | if (mes->latch && mes->shouldWriteStat) 85 | SetLatch(mes->latch); 86 | } 87 | 88 | 89 | static void 90 | ProcessMessageSymbol(MessageSymbol *mes) 91 | { 92 | uint16 symbolLen = mes->base.size - sizeof(MessageSymbol); 93 | 94 | memcpy(currentSymbol, mes->symbol, symbolLen); 95 | 96 | currentSymbol[symbolLen] = '\0'; 97 | 98 | currentStorage = NULL; 99 | 100 | LIST_FOREACH(storageList, it) 101 | { 102 | UprobeStorage *storage = (UprobeStorage *) it->value; 103 | 104 | if (strcmp(storage->symbol, currentSymbol) == 0) 105 | { 106 | currentStorage = storage; 107 | break; 108 | } 109 | } 110 | } 111 | 112 | 113 | static void 114 | ProcessMessageCustom(Message *mes) 115 | { 116 | if (currentStorage) 117 | currentStorage->putData(currentStorage, mes); 118 | } 119 | 120 | 121 | static void 122 | ProcessOneMessage(Message *mes) 123 | { 124 | switch (mes->type) 125 | { 126 | case MESSAGE_NEW_SHARED_UPROBE: 127 | { 128 | ProcessMessageNewSharedUprobe((MessageNewSharedUprobe *) mes); 129 | break; 130 | } 131 | case MESSAGE_WRITE_STAT: 132 | { 133 | ProcessMessageWriteStat((MessageWriteStat *) mes); 134 | break; 135 | } 136 | case MESSAGE_DELETE_SHARED_UPROBE: 137 | { 138 | ProcessMessageDeleteSharedUprobe((MessageDeleteSharedUprobe *) mes); 139 | break; 140 | } 141 | case MESSAGE_SYMBOL: 142 | { 143 | ProcessMessageSymbol((MessageSymbol *) mes); 144 | break; 145 | } 146 | case MESSAGE_CUSTOM: 147 | { 148 | ProcessMessageCustom(mes); 149 | break; 150 | } 151 | default: 152 | elog(LOG, "invalid message type %d for symbol %128s", mes->type, currentSymbol); 153 | } 154 | } 155 | 156 | 157 | static void 158 | TermSignalHandler(SIGNAL_ARGS) 159 | { 160 | MessageBufferDelete(); 161 | proc_exit(0); 162 | } 163 | 164 | 165 | static void 166 | StatCollectorAtExitCallback(void) 167 | { 168 | LIST_FOREACH(storageList, it) 169 | { 170 | UprobeStorage *storage = (UprobeStorage *) it->value; 171 | 172 | storage->writeStat(storage, false); 173 | } 174 | } 175 | 176 | 177 | static void 178 | StatCollectorSharedConfigApplyFunc(const char *func, const char *type) 179 | { 180 | MessageNewSharedUprobe mes; 181 | 182 | mes.uprobeType = GetTypeByCharName(type); 183 | 184 | if (mes.uprobeType == INVALID_TYPE) 185 | { 186 | elog(WARNING, "invalid attach type %128s", type); 187 | return; 188 | } 189 | 190 | strncpy(currentSymbol, func, 255); 191 | 192 | ProcessMessageNewSharedUprobe(&mes); 193 | } 194 | 195 | 196 | void 197 | StatCollectorMain(void) 198 | { 199 | Message *messages; 200 | uint32 currentNumberOfMessages; 201 | 202 | pqsignal(SIGTERM, TermSignalHandler); 203 | pqsignal(SIGHUP, SIG_IGN); 204 | pqsignal(SIGUSR1, SIG_IGN); 205 | BackgroundWorkerUnblockSignals(); 206 | 207 | collectorContext = AllocSetContextCreate(NULL, "pg_uprobe collector", ALLOCSET_DEFAULT_SIZES); 208 | MemoryContextSwitchTo(collectorContext); 209 | 210 | 211 | ListInit(&storageList, (CompareFunction) &StorageListComparator, collectorContext); 212 | 213 | PGUprobeLoadFromSharedConfig(&StatCollectorSharedConfigApplyFunc); 214 | before_shmem_exit((pg_on_exit_callback) StatCollectorAtExitCallback, (Datum) 0); 215 | messages = palloc(MESSAGEBUFFER_SIZE); 216 | while (true) 217 | { 218 | Message *currentMessage; 219 | 220 | currentNumberOfMessages = MessageBufferGet(messages, MESSAGEBUFFER_SIZE); 221 | 222 | currentMessage = messages; 223 | for (uint32 i = 0; i < currentNumberOfMessages; i++) 224 | { 225 | ProcessOneMessage(currentMessage); 226 | 227 | currentMessage = (Message *) (((char *) currentMessage) + currentMessage->size); 228 | } 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /src/uprobe_shared_config.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include "fmgr.h" 3 | #include "utils/builtins.h" 4 | #include "utils/elog.h" 5 | #include "utils/varlena.h" 6 | #include "utils/jsonb.h" 7 | 8 | #include "uprobe_shared_config.h" 9 | 10 | #define CONFIG_PATH "./pg_uprobe/pg_uprobe_conf.jsonb" 11 | 12 | 13 | static void WriteJsonbToConfigFile(Jsonb *j); 14 | static char *ReadConfigToString(long *resultSize); 15 | 16 | static void 17 | WriteJsonbToConfigFile(Jsonb *j) 18 | { 19 | FILE *config = fopen(CONFIG_PATH, "w"); 20 | uint32 size = VARSIZE_4B(j); 21 | 22 | if (config == NULL) 23 | { 24 | elog(ERROR, "can't open config base/" CONFIG_PATH " file for writing"); 25 | } 26 | if (fwrite(j, 1, size, config) != size) 27 | { 28 | elog(ERROR, "can't write full config base/" CONFIG_PATH); 29 | } 30 | fclose(config); 31 | } 32 | 33 | static char * 34 | ReadConfigToString(long *resultSize) 35 | { 36 | long fsize; 37 | char *string; 38 | FILE *f = fopen(CONFIG_PATH, "rb"); 39 | 40 | if (f == NULL) 41 | return NULL; 42 | 43 | fseek(f, 0, SEEK_END); 44 | fsize = ftell(f); 45 | fseek(f, 0, SEEK_SET); 46 | 47 | string = palloc(fsize); 48 | if (fread(string, fsize, 1, f) != 1) 49 | { 50 | pfree(string); 51 | return NULL; 52 | } 53 | 54 | fclose(f); 55 | if (resultSize) 56 | *resultSize = fsize; 57 | 58 | return string; 59 | } 60 | 61 | /* should be called only in transactions */ 62 | void 63 | PGUprobeSaveInSharedConfig(char *func, char *type) 64 | { 65 | 66 | text *config; 67 | JsonbParseState *parse_state = NULL; 68 | JsonbValue json_value_config; 69 | JsonbPair pair; 70 | Jsonb *result_config; 71 | JsonbIteratorToken iter_token; 72 | JsonbIterator *json_iterator; 73 | JsonbValue v; 74 | 75 | PG_TRY(); 76 | { 77 | config = DatumGetTextP(DirectFunctionCall1(pg_read_binary_file_all, CStringGetTextDatum(CONFIG_PATH))); 78 | } 79 | PG_CATCH(); 80 | { 81 | elog(LOG, "can't read uprobe config will create new"); 82 | config = palloc(sizeof(text)); 83 | SET_VARSIZE_4B(config, VARHDRSZ); 84 | } 85 | PG_END_TRY(); 86 | 87 | /* : */ 88 | pair.key.type = jbvString; 89 | pair.key.val.string.len = (int) strlen(func) + 1; 90 | pair.key.val.string.val = func; 91 | pair.value.type = jbvString; 92 | pair.value.val.string.len = (int) strlen(type) + 1; 93 | pair.value.val.string.val = type; 94 | if (VARSIZE_ANY_EXHDR(config) != 0) 95 | { 96 | JsonbToJsonbValue((Jsonb *) VARDATA_ANY(config), &json_value_config); 97 | json_iterator = JsonbIteratorInit(json_value_config.val.binary.data); 98 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 99 | Assert(iter_token == WJB_BEGIN_OBJECT); 100 | 101 | while (iter_token != WJB_END_OBJECT) 102 | { 103 | 104 | pushJsonbValue(&parse_state, iter_token, 105 | iter_token < WJB_BEGIN_ARRAY || 106 | (iter_token == WJB_BEGIN_ARRAY && 107 | v.val.array.rawScalar) ? &v : NULL); 108 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 109 | } 110 | /* we do it to end iteration correctly */ 111 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 112 | Assert(iter_token == WJB_DONE); 113 | } 114 | else 115 | { 116 | pushJsonbValue(&parse_state, WJB_BEGIN_OBJECT, NULL); 117 | } 118 | 119 | 120 | pushJsonbValue(&parse_state, WJB_KEY, &pair.key); 121 | pushJsonbValue(&parse_state, WJB_VALUE, &pair.value); 122 | result_config = JsonbValueToJsonb(pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL)); 123 | 124 | WriteJsonbToConfigFile(result_config); 125 | pfree(result_config); 126 | } 127 | 128 | 129 | void 130 | PGUprobeLoadFromSharedConfig(LoadFromConfigApplyFunc applyFunc) 131 | { 132 | char *config = NULL; 133 | long configSize; 134 | JsonbValue json_value_config; 135 | JsonbIteratorToken iter_token; 136 | JsonbValue k; 137 | JsonbValue v; 138 | JsonbIterator *json_iterator; 139 | 140 | config = ReadConfigToString(&configSize); 141 | 142 | if (configSize == 0 || config == NULL) 143 | { 144 | elog(LOG, "can't load from empty config"); 145 | return; 146 | } 147 | 148 | JsonbToJsonbValue((Jsonb *) config, &json_value_config); 149 | json_iterator = JsonbIteratorInit(json_value_config.val.binary.data); 150 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 151 | 152 | Assert(iter_token == WJB_BEGIN_OBJECT); 153 | while (iter_token != WJB_END_OBJECT) 154 | { 155 | iter_token = JsonbIteratorNext(&json_iterator, &k, false); 156 | if (iter_token == WJB_END_OBJECT) 157 | break; 158 | 159 | Assert(iter_token == WJB_KEY); 160 | Assert(k.type == jbvString); 161 | 162 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 163 | Assert(iter_token == WJB_VALUE); 164 | Assert(v.type == jbvString); 165 | 166 | applyFunc(k.val.string.val, v.val.string.val); 167 | } 168 | /* we do it to end iteration correctly */ 169 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 170 | Assert(iter_token == WJB_DONE); 171 | pfree(config); 172 | } 173 | 174 | 175 | void 176 | PGUprobeDeleteFromSharedConfig(const char *func) 177 | { 178 | char *config = NULL; 179 | long configSize; 180 | JsonbParseState *parse_state = NULL; 181 | JsonbValue json_value_config; 182 | JsonbIteratorToken iter_token; 183 | JsonbValue v; 184 | JsonbIterator *json_iterator; 185 | bool skip_next_value = false; 186 | Jsonb *result_config; 187 | 188 | config = ReadConfigToString(&configSize); 189 | 190 | if (configSize == 0 || config == NULL) 191 | { 192 | elog(LOG, "can't delete from empty config"); 193 | return; 194 | } 195 | 196 | JsonbToJsonbValue((Jsonb *) config, &json_value_config); 197 | json_iterator = JsonbIteratorInit(json_value_config.val.binary.data); 198 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 199 | Assert(iter_token == WJB_BEGIN_OBJECT); 200 | 201 | while (iter_token != WJB_END_OBJECT) 202 | { 203 | if (skip_next_value && iter_token == WJB_VALUE) 204 | { 205 | skip_next_value = false; 206 | goto next_iteration; 207 | } 208 | if (iter_token == WJB_KEY) 209 | { 210 | Assert(v.type == jbvString); 211 | if (!strcmp(v.val.string.val, func)) 212 | { 213 | skip_next_value = true; 214 | goto next_iteration; 215 | } 216 | } 217 | 218 | 219 | pushJsonbValue(&parse_state, iter_token, 220 | iter_token < WJB_BEGIN_ARRAY || 221 | (iter_token == WJB_BEGIN_ARRAY && 222 | v.val.array.rawScalar) ? &v : NULL); 223 | next_iteration: 224 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 225 | } 226 | 227 | /* we do it to end iteration correctly */ 228 | iter_token = JsonbIteratorNext(&json_iterator, &v, false); 229 | Assert(iter_token == WJB_DONE); 230 | 231 | result_config = JsonbValueToJsonb(pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL)); 232 | WriteJsonbToConfigFile(result_config); 233 | pfree(result_config); 234 | pfree(config); 235 | } 236 | -------------------------------------------------------------------------------- /src/json_to_jsonbvalue_parser.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include "common/jsonapi.h" 3 | #include "utils/fmgrprotos.h" 4 | #include "mb/pg_wchar.h" 5 | 6 | #include "json_to_jsonbvalue_parser.h" 7 | 8 | #if PG_MAJORVERSION_NUM > 15 9 | #define JSON_PARSE_ERROR_TYPE JsonParseErrorType 10 | #define RETURN_SUCCESS JSON_SUCCESS 11 | #define RETURN_ERROR JSON_SEM_ACTION_FAILED 12 | #else 13 | #define JSON_PARSE_ERROR_TYPE void 14 | #define RETURN_SUCCESS /* empty */ 15 | #define RETURN_ERROR /* empty */ 16 | 17 | #define IsA(nodeptr,_type_) (nodeTag(nodeptr) == T_##_type_) 18 | 19 | static bool 20 | DirectInputFunctionCallSafe(PGFunction func, char *str, 21 | Oid typioparam, int32 typmod, 22 | fmNodePtr escontext, 23 | Datum *result) 24 | { 25 | LOCAL_FCINFO(fcinfo, 3); 26 | 27 | if (str == NULL) 28 | { 29 | *result = (Datum) 0; /* just return null result */ 30 | return true; 31 | } 32 | 33 | InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, escontext, NULL); 34 | 35 | fcinfo->args[0].value = CStringGetDatum(str); 36 | fcinfo->args[0].isnull = false; 37 | fcinfo->args[1].value = ObjectIdGetDatum(typioparam); 38 | fcinfo->args[1].isnull = false; 39 | fcinfo->args[2].value = Int32GetDatum(typmod); 40 | fcinfo->args[2].isnull = false; 41 | 42 | *result = (*func) (fcinfo); 43 | 44 | /* Otherwise, shouldn't get null result */ 45 | if (fcinfo->isnull) 46 | elog(ERROR, "input function %p returned NULL", (void *) func); 47 | 48 | return true; 49 | } 50 | 51 | #endif 52 | 53 | typedef struct JsonbInState 54 | { 55 | JsonbParseState *parseState; 56 | JsonbValue *res; 57 | } JsonbInState; 58 | 59 | static JSON_PARSE_ERROR_TYPE jsonb_in_object_start(void *pstate); 60 | static JSON_PARSE_ERROR_TYPE jsonb_in_object_end(void *pstate); 61 | static JSON_PARSE_ERROR_TYPE jsonb_in_array_start(void *pstate); 62 | static JSON_PARSE_ERROR_TYPE jsonb_in_array_end(void *pstate); 63 | static JSON_PARSE_ERROR_TYPE jsonb_in_object_field_start(void *pstate, char *fname, bool isnull); 64 | static JSON_PARSE_ERROR_TYPE jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype); 65 | 66 | JsonbValue * 67 | jsonToJsonbValue(char *json, size_t len) 68 | { 69 | JsonLexContext *lex; 70 | JsonbInState state; 71 | JsonSemAction sem; 72 | JsonbValue *result; 73 | 74 | memset(&state, 0, sizeof(state)); 75 | memset(&sem, 0, sizeof(sem)); 76 | #if PG_MAJORVERSION_NUM > 16 77 | lex = palloc0(sizeof(JsonLexContext)); 78 | makeJsonLexContextCstringLen(lex, json, len, GetDatabaseEncoding(), true); 79 | #else 80 | lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true); 81 | #endif 82 | sem.semstate = (void *) &state; 83 | 84 | sem.object_start = jsonb_in_object_start; 85 | sem.array_start = jsonb_in_array_start; 86 | sem.object_end = jsonb_in_object_end; 87 | sem.array_end = jsonb_in_array_end; 88 | sem.scalar = jsonb_in_scalar; 89 | sem.object_field_start = jsonb_in_object_field_start; 90 | 91 | if (pg_parse_json(lex, &sem) != JSON_SUCCESS) 92 | { 93 | result = NULL; 94 | } 95 | /* after parsing, the item member has the composed jsonb structure */ 96 | result = state.res; 97 | pfree(lex); 98 | 99 | return result; 100 | } 101 | 102 | 103 | static JSON_PARSE_ERROR_TYPE 104 | jsonb_in_object_start(void *pstate) 105 | { 106 | JsonbInState *_state = (JsonbInState *) pstate; 107 | 108 | _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_OBJECT, NULL); 109 | 110 | return RETURN_SUCCESS; 111 | } 112 | 113 | static JSON_PARSE_ERROR_TYPE 114 | jsonb_in_object_end(void *pstate) 115 | { 116 | JsonbInState *_state = (JsonbInState *) pstate; 117 | 118 | _state->res = pushJsonbValue(&_state->parseState, WJB_END_OBJECT, NULL); 119 | 120 | return RETURN_SUCCESS; 121 | } 122 | 123 | static JSON_PARSE_ERROR_TYPE 124 | jsonb_in_array_start(void *pstate) 125 | { 126 | JsonbInState *_state = (JsonbInState *) pstate; 127 | 128 | _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, NULL); 129 | 130 | return RETURN_SUCCESS; 131 | } 132 | 133 | static JSON_PARSE_ERROR_TYPE 134 | jsonb_in_array_end(void *pstate) 135 | { 136 | JsonbInState *_state = (JsonbInState *) pstate; 137 | 138 | _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); 139 | 140 | return RETURN_SUCCESS; 141 | } 142 | 143 | static JSON_PARSE_ERROR_TYPE 144 | jsonb_in_object_field_start(void *pstate, char *fname, bool isnull) 145 | { 146 | JsonbInState *_state = (JsonbInState *) pstate; 147 | JsonbValue v; 148 | 149 | Assert(fname != NULL); 150 | v.type = jbvString; 151 | v.val.string.len = strlen(fname); 152 | v.val.string.val = fname; 153 | 154 | _state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v); 155 | 156 | return RETURN_SUCCESS; 157 | } 158 | 159 | /* 160 | * For jsonb we always want the de-escaped value - that's what's in token 161 | */ 162 | static JSON_PARSE_ERROR_TYPE 163 | jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype) 164 | { 165 | JsonbInState *_state = (JsonbInState *) pstate; 166 | JsonbValue v; 167 | Datum numd; 168 | 169 | switch (tokentype) 170 | { 171 | 172 | case JSON_TOKEN_STRING: 173 | Assert(token != NULL); 174 | v.type = jbvString; 175 | v.val.string.len = strlen(token); 176 | v.val.string.val = token; 177 | break; 178 | case JSON_TOKEN_NUMBER: 179 | 180 | /* 181 | * No need to check size of numeric values, because maximum 182 | * numeric size is well below the JsonbValue restriction 183 | */ 184 | Assert(token != NULL); 185 | v.type = jbvNumeric; 186 | if (!DirectInputFunctionCallSafe(numeric_in, token, 187 | InvalidOid, -1, 188 | NULL, 189 | &numd)) 190 | return RETURN_ERROR; 191 | v.val.numeric = DatumGetNumeric(numd); 192 | break; 193 | case JSON_TOKEN_TRUE: 194 | v.type = jbvBool; 195 | v.val.boolean = true; 196 | break; 197 | case JSON_TOKEN_FALSE: 198 | v.type = jbvBool; 199 | v.val.boolean = false; 200 | break; 201 | case JSON_TOKEN_NULL: 202 | v.type = jbvNull; 203 | break; 204 | default: 205 | /* should not be possible */ 206 | elog(ERROR, "invalid json token type"); 207 | break; 208 | } 209 | 210 | if (_state->parseState == NULL) 211 | { 212 | /* single scalar */ 213 | JsonbValue va; 214 | 215 | va.type = jbvArray; 216 | va.val.array.rawScalar = true; 217 | va.val.array.nElems = 1; 218 | 219 | _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, &va); 220 | _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); 221 | _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); 222 | } 223 | else 224 | { 225 | JsonbValue *o = &_state->parseState->contVal; 226 | 227 | switch (o->type) 228 | { 229 | case jbvArray: 230 | _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); 231 | break; 232 | case jbvObject: 233 | _state->res = pushJsonbValue(&_state->parseState, WJB_VALUE, &v); 234 | break; 235 | default: 236 | elog(ERROR, "unexpected parent of nested structure"); 237 | } 238 | } 239 | 240 | return RETURN_SUCCESS; 241 | } 242 | -------------------------------------------------------------------------------- /tests/scripts/init.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE users ( 2 | id SERIAL PRIMARY KEY, 3 | first_name VARCHAR(100) NOT NULL, 4 | last_name VARCHAR(100), 5 | email VARCHAR(255) UNIQUE NOT NULL, 6 | phone_number VARCHAR(20), 7 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 8 | ); 9 | 10 | CREATE TABLE products ( 11 | id SERIAL PRIMARY KEY, 12 | name VARCHAR(200) NOT NULL, 13 | description TEXT, 14 | price DECIMAL(10, 2) NOT NULL CHECK(price >= 0), 15 | stock INT DEFAULT 0 CHECK(stock >= 0) 16 | ); 17 | 18 | CREATE TABLE orders ( 19 | id SERIAL PRIMARY KEY, 20 | user_id INT REFERENCES users(id) ON DELETE CASCADE, 21 | order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 22 | total_amount DECIMAL(10, 2) NOT NULL CHECK(total_amount >= 0), 23 | status VARCHAR(50) DEFAULT 'NEW' CHECK(status IN ('NEW', 'PROCESSING', 'SHIPPED', 'DELIVERED', 'CANCELLED')) 24 | ); 25 | 26 | CREATE TABLE order_items ( 27 | id SERIAL PRIMARY KEY, 28 | order_id INT REFERENCES orders(id) ON DELETE CASCADE, 29 | product_id INT REFERENCES products(id) ON DELETE RESTRICT, 30 | quantity INT DEFAULT 1 CHECK(quantity > 0), 31 | unit_price DECIMAL(10, 2) NOT NULL CHECK(unit_price >= 0) 32 | ); 33 | 34 | 35 | 36 | CREATE OR REPLACE FUNCTION calculate_order_total(order_id_2 INT) 37 | RETURNS DECIMAL(10, 2) AS $$ 38 | DECLARE 39 | total DECIMAL(10, 2); 40 | BEGIN 41 | SELECT SUM(quantity * unit_price) INTO total 42 | FROM order_items 43 | WHERE order_items.order_id = order_id_2; 44 | 45 | RETURN COALESCE(total, 0); 46 | END; 47 | $$ LANGUAGE plpgsql; 48 | 49 | 50 | CREATE OR REPLACE FUNCTION update_order_status( 51 | p_order_id INT, 52 | p_new_status VARCHAR(50)) 53 | RETURNS VOID AS $$ 54 | BEGIN 55 | -- Validate status 56 | IF p_new_status NOT IN ('NEW', 'PROCESSING', 'SHIPPED', 'DELIVERED', 'CANCELLED') THEN 57 | RAISE EXCEPTION 'Invalid order status: %', p_new_status; 58 | END IF; 59 | 60 | UPDATE orders 61 | SET status = p_new_status 62 | WHERE id = p_order_id; 63 | END; 64 | $$ LANGUAGE plpgsql; 65 | 66 | CREATE OR REPLACE FUNCTION add_product_to_order( 67 | p_order_id INT, 68 | p_product_id INT, 69 | p_quantity INT) 70 | RETURNS VOID AS $$ 71 | DECLARE 72 | v_price DECIMAL(10, 2); 73 | v_stock INT; 74 | BEGIN 75 | 76 | SELECT price, stock INTO v_price, v_stock 77 | FROM products 78 | WHERE id = p_product_id; 79 | 80 | IF v_stock < p_quantity THEN 81 | RAISE EXCEPTION 'Insufficient stock for product ID % (available: %, requested: %)', 82 | p_product_id, v_stock, p_quantity; 83 | END IF; 84 | 85 | 86 | INSERT INTO order_items (order_id, product_id, quantity, unit_price) 87 | VALUES (p_order_id, p_product_id, p_quantity, v_price); 88 | 89 | 90 | UPDATE products 91 | SET stock = stock - p_quantity 92 | WHERE id = p_product_id; 93 | 94 | 95 | UPDATE orders 96 | SET total_amount = total_amount + (v_price * p_quantity) 97 | WHERE id = p_order_id; 98 | END; 99 | $$ LANGUAGE plpgsql; 100 | 101 | 102 | CREATE OR REPLACE FUNCTION get_user_orders(p_user_id INT) 103 | RETURNS TABLE ( 104 | order_id INT, 105 | total_amount DECIMAL(10, 2), 106 | status VARCHAR(50), 107 | item_count BIGINT 108 | ) AS $$ 109 | BEGIN 110 | RETURN QUERY 111 | SELECT 112 | o.id AS order_id, 113 | o.total_amount, 114 | o.status, 115 | COUNT(oi.id)::BIGINT AS item_count 116 | FROM orders o 117 | LEFT JOIN order_items oi ON o.id = oi.order_id 118 | WHERE o.user_id = p_user_id 119 | GROUP BY o.id; 120 | END; 121 | $$ LANGUAGE plpgsql; 122 | 123 | 124 | CREATE OR REPLACE FUNCTION create_user( 125 | p_first_name VARCHAR(100), 126 | p_last_name VARCHAR(100), 127 | p_email VARCHAR(255), 128 | p_phone_number VARCHAR(20) DEFAULT NULL) 129 | RETURNS INT AS $$ 130 | DECLARE 131 | v_user_id INT; 132 | BEGIN 133 | 134 | IF p_email !~ '^[A-Za-z0-9._%-]+@[A-Za-z0-9.-]+[.][A-Za-z]+$' THEN 135 | RAISE EXCEPTION 'Invalid email format'; 136 | END IF; 137 | 138 | 139 | INSERT INTO users (first_name, last_name, email, phone_number) 140 | VALUES (p_first_name, p_last_name, p_email, p_phone_number) 141 | RETURNING id INTO v_user_id; 142 | 143 | RETURN v_user_id; 144 | EXCEPTION 145 | WHEN unique_violation THEN 146 | RAISE EXCEPTION 'Email % already exists', p_email; 147 | END; 148 | $$ LANGUAGE plpgsql; 149 | 150 | 151 | CREATE OR REPLACE FUNCTION get_low_stock_products(p_threshold INT DEFAULT 5) 152 | RETURNS TABLE ( 153 | product_id INT, 154 | product_name VARCHAR(200), 155 | current_stock INT, 156 | product_price DECIMAL(10, 2) 157 | ) AS $$ 158 | BEGIN 159 | RETURN QUERY 160 | SELECT 161 | id AS product_id, 162 | name AS product_name, 163 | stock AS current_stock, 164 | price AS product_price 165 | FROM products 166 | WHERE stock < p_threshold 167 | ORDER BY stock ASC; 168 | END; 169 | $$ LANGUAGE plpgsql; 170 | 171 | 172 | 173 | CREATE OR REPLACE FUNCTION validate_user_exists(p_user_id INT) 174 | RETURNS BOOLEAN AS $$ 175 | DECLARE 176 | user_count INT; 177 | BEGIN 178 | SELECT COUNT(*) INTO user_count FROM users WHERE id = p_user_id; 179 | RETURN user_count > 0; 180 | END; 181 | $$ LANGUAGE plpgsql; 182 | 183 | 184 | CREATE OR REPLACE FUNCTION create_new_order(p_user_id INT) 185 | RETURNS INT AS $$ 186 | DECLARE 187 | new_order_id INT; 188 | BEGIN 189 | IF NOT validate_user_exists(p_user_id) THEN 190 | RAISE EXCEPTION 'User with ID % does not exist', p_user_id; 191 | END IF; 192 | 193 | INSERT INTO orders (user_id, total_amount) 194 | VALUES (p_user_id, 0) 195 | RETURNING id INTO new_order_id; 196 | 197 | RETURN new_order_id; 198 | END; 199 | $$ LANGUAGE plpgsql; 200 | 201 | 202 | 203 | CREATE OR REPLACE FUNCTION process_complete_order( 204 | p_user_id INT, 205 | p_product_ids INT[], 206 | p_quantities INT[] 207 | ) RETURNS INT AS $$ 208 | DECLARE 209 | v_order_id INT; 210 | v_product_id INT; 211 | v_quantity INT; 212 | i INT; 213 | BEGIN 214 | 215 | IF array_length(p_product_ids, 1) != array_length(p_quantities, 1) THEN 216 | RAISE EXCEPTION 'Product IDs and quantities arrays must have the same length'; 217 | END IF; 218 | 219 | v_order_id := create_new_order(p_user_id); 220 | 221 | FOR i IN 1..array_length(p_product_ids, 1) LOOP 222 | v_product_id := p_product_ids[i]; 223 | v_quantity := p_quantities[i]; 224 | 225 | PERFORM add_product_to_order( 226 | v_order_id, 227 | v_product_id, 228 | v_quantity 229 | ); 230 | END LOOP; 231 | 232 | UPDATE orders 233 | SET total_amount = calculate_order_total(v_order_id) 234 | WHERE id = v_order_id; 235 | 236 | RETURN v_order_id; 237 | EXCEPTION 238 | WHEN OTHERS THEN 239 | 240 | IF v_order_id IS NOT NULL THEN 241 | PERFORM update_order_status(v_order_id, 'CANCELLED'); 242 | END IF; 243 | RAISE; 244 | END; 245 | $$ LANGUAGE plpgsql; -------------------------------------------------------------------------------- /src/uprobe_message_buffer.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include "utils/wait_event.h" 3 | #include "storage/shmem.h" 4 | #include "storage/lwlock.h" 5 | #include "storage/condition_variable.h" 6 | 7 | #include "uprobe_message_buffer.h" 8 | 9 | #define MESSAGE_BUFFER_EMPTY_SPACE_WAIT (ESTIMATE_MESSAGE_SIZE * 64) 10 | 11 | 12 | 13 | typedef struct MessageBuffer 14 | { 15 | LWLock *lock; 16 | ConditionVariable cond; 17 | pg_atomic_uint32 numMessages; 18 | pg_atomic_uint32 freeSpace; 19 | volatile bool deleted; 20 | Message messages[FLEXIBLE_ARRAY_MEMBER]; 21 | } MessageBuffer; 22 | 23 | 24 | MessageBuffer *messageBuffer; 25 | 26 | 27 | static bool WaitUntillMessageBufferHasSpace(void); 28 | static bool WaitUntilMessageBufferNotEmpty(void); 29 | static void EndMessageBufferOperation(uint32 numMessages, uint32 freeSpace); 30 | 31 | void 32 | MessageBufferRequest(void) 33 | { 34 | RequestAddinShmemSpace(MAXALIGN(sizeof(MessageBuffer) + MESSAGEBUFFER_SIZE)); 35 | RequestNamedLWLockTranche("pg_uprobe collect lock", 1); 36 | } 37 | 38 | 39 | void 40 | MessageBufferInit(void) 41 | { 42 | bool found; 43 | 44 | LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); 45 | 46 | messageBuffer = ShmemInitStruct("pg_uprobe messageBuffer", sizeof(MessageBuffer) + MESSAGEBUFFER_SIZE, 47 | &found); 48 | LWLockRelease(AddinShmemInitLock); 49 | if (!found) 50 | { 51 | pg_atomic_write_u32(&messageBuffer->numMessages, 0); 52 | ConditionVariableInit(&messageBuffer->cond); 53 | messageBuffer->deleted = false; 54 | messageBuffer->lock = &(GetNamedLWLockTranche("pg_uprobe collect lock"))->lock; 55 | pg_atomic_write_u32(&messageBuffer->freeSpace, MESSAGEBUFFER_SIZE); 56 | } 57 | 58 | } 59 | 60 | static void 61 | EndMessageBufferOperation(uint32 numMessages, uint32 freeSpace) 62 | { 63 | pg_atomic_write_u32(&messageBuffer->numMessages, numMessages); 64 | pg_atomic_write_u32(&messageBuffer->freeSpace, freeSpace); 65 | 66 | LWLockRelease(messageBuffer->lock); 67 | ConditionVariableBroadcast(&messageBuffer->cond); 68 | } 69 | 70 | /* 71 | *If returns true messageBuffer->lock is acquired in LW_EXCLUSIVE 72 | *caller must call EndMessageBufferOperation to release it. 73 | *Else no lock is acquired and message buffer is deleted. 74 | */ 75 | static bool 76 | WaitUntillMessageBufferHasSpace(void) 77 | { 78 | uint32 freeSpaceupdated; 79 | bool isLockTaken = false; 80 | 81 | ConditionVariablePrepareToSleep(&messageBuffer->cond); 82 | freeSpaceupdated = pg_atomic_read_u32(&messageBuffer->freeSpace); 83 | do 84 | { 85 | if (freeSpaceupdated < MESSAGE_BUFFER_EMPTY_SPACE_WAIT && !messageBuffer->deleted) 86 | { 87 | ConditionVariableSleep(&messageBuffer->cond, WAIT_EVENT_PG_SLEEP); 88 | freeSpaceupdated = pg_atomic_read_u32(&messageBuffer->freeSpace); 89 | } 90 | LWLockAcquire(messageBuffer->lock, LW_EXCLUSIVE); 91 | freeSpaceupdated = pg_atomic_read_u32(&messageBuffer->freeSpace); 92 | 93 | if (freeSpaceupdated > MESSAGE_BUFFER_EMPTY_SPACE_WAIT) 94 | { 95 | isLockTaken = true; 96 | break; 97 | } 98 | 99 | LWLockRelease(messageBuffer->lock); 100 | } while (!messageBuffer->deleted); 101 | ConditionVariableCancelSleep(); 102 | return isLockTaken; 103 | } 104 | 105 | int 106 | MessageBufferPut(const Message *mes, uint32 n, char *symbol) 107 | { 108 | uint32 numToStore = 0; 109 | uint32 bytesToStore = 0; 110 | uint32 freeSpace; 111 | uint32 numInBuffer; 112 | const Message *currentMessage = mes; 113 | Message *lastMessageInbuffer; 114 | size_t symbolLen; 115 | 116 | if (messageBuffer->deleted || n == 0) 117 | return n; 118 | 119 | if (!WaitUntillMessageBufferHasSpace()) 120 | return n; 121 | 122 | freeSpace = pg_atomic_read_u32(&messageBuffer->freeSpace); 123 | lastMessageInbuffer = (Message *) (((char *) messageBuffer->messages) + (MESSAGEBUFFER_SIZE - freeSpace)); 124 | numInBuffer = pg_atomic_read_u32(&messageBuffer->numMessages); 125 | 126 | symbolLen = strlen(symbol); 127 | lastMessageInbuffer->type = MESSAGE_SYMBOL; 128 | lastMessageInbuffer->size = (uint16) symbolLen + sizeof(Message); 129 | /* + 1 is used to skip type and size fields in struct */ 130 | memcpy(lastMessageInbuffer + 1, symbol, lastMessageInbuffer->size); 131 | 132 | numInBuffer++; 133 | freeSpace -= lastMessageInbuffer->size; 134 | 135 | lastMessageInbuffer = (Message *) (((char *) lastMessageInbuffer) + lastMessageInbuffer->size); 136 | 137 | while (numToStore < n) 138 | { 139 | uint32 mesSize = (uint32) currentMessage->size; 140 | 141 | if (bytesToStore + mesSize > freeSpace) 142 | break; 143 | bytesToStore += mesSize; 144 | currentMessage = (const Message *) (((char *) mes) + bytesToStore); 145 | numToStore++; 146 | } 147 | 148 | memcpy(lastMessageInbuffer, mes, bytesToStore); 149 | 150 | EndMessageBufferOperation(numToStore + numInBuffer, freeSpace - bytesToStore); 151 | 152 | return numToStore; 153 | } 154 | 155 | 156 | /* 157 | *If returns true messageBuffer->lock is acquired in LW_EXCLUSIVE 158 | *caller must call EndMessageBufferOperation to release it. 159 | *Else no lock is acquired and message buffer is deleted. 160 | */ 161 | static bool 162 | WaitUntilMessageBufferNotEmpty(void) 163 | { 164 | uint32 numMessagesUpdated; 165 | bool isLockTaken = false; 166 | 167 | ConditionVariablePrepareToSleep(&messageBuffer->cond); 168 | numMessagesUpdated = pg_atomic_read_u32(&messageBuffer->numMessages); 169 | do 170 | { 171 | if (!numMessagesUpdated && !messageBuffer->deleted) 172 | { 173 | ConditionVariableSleep(&messageBuffer->cond, WAIT_EVENT_PG_SLEEP); 174 | numMessagesUpdated = pg_atomic_read_u32(&messageBuffer->numMessages); 175 | } 176 | LWLockAcquire(messageBuffer->lock, LW_EXCLUSIVE); 177 | numMessagesUpdated = pg_atomic_read_u32(&messageBuffer->numMessages); 178 | 179 | if (numMessagesUpdated) 180 | { 181 | isLockTaken = true; 182 | break; 183 | } 184 | 185 | LWLockRelease(messageBuffer->lock); 186 | } while (!messageBuffer->deleted); 187 | ConditionVariableCancelSleep(); 188 | return isLockTaken; 189 | } 190 | 191 | /* 192 | *getting messages from messageBuffer with blocking if threre is no messages 193 | */ 194 | int 195 | MessageBufferGet(Message *mes, uint32 bufferSize) 196 | { 197 | uint32 numToGet = 0; 198 | uint32 numInTheBuffer; 199 | uint32 bytesToGet = 0; 200 | Message *currentMessage = messageBuffer->messages; 201 | uint32 freeSpace; 202 | 203 | if (messageBuffer->deleted) 204 | return 0; 205 | 206 | if (!WaitUntilMessageBufferNotEmpty()) 207 | return 0; 208 | 209 | numInTheBuffer = pg_atomic_read_u32(&messageBuffer->numMessages); 210 | freeSpace = pg_atomic_read_u32(&messageBuffer->freeSpace); 211 | while (numToGet < numInTheBuffer) 212 | { 213 | uint32 mesSize = (uint32) currentMessage->size; 214 | 215 | if (bytesToGet + mesSize > bufferSize) 216 | break; 217 | 218 | bytesToGet += mesSize; 219 | currentMessage = (Message *) (((char *) currentMessage) + mesSize); 220 | numToGet++; 221 | } 222 | 223 | memcpy(mes, messageBuffer->messages, bytesToGet); 224 | memmove(messageBuffer->messages, 225 | ((char *) messageBuffer->messages) + bytesToGet, 226 | MESSAGEBUFFER_SIZE - freeSpace - bytesToGet); 227 | 228 | EndMessageBufferOperation(numInTheBuffer - numToGet, freeSpace + bytesToGet); 229 | return numToGet; 230 | } 231 | 232 | /* We don't actualy delete MessageBuffer. We make all operations finish as soon as they get in */ 233 | void 234 | MessageBufferDelete() 235 | { 236 | messageBuffer->deleted = true; 237 | ConditionVariableBroadcast(&messageBuffer->cond); 238 | } 239 | -------------------------------------------------------------------------------- /src/trace_wait_events.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "miscadmin.h" 4 | #include "utils/hsearch.h" 5 | #include "utils/wait_event.h" 6 | 7 | #include "uprobe_attach_interface.h" 8 | #include "trace_session.h" 9 | 10 | #include "trace_wait_events.h" 11 | 12 | typedef struct WaitEventData 13 | { 14 | uint32 eventId; 15 | uint64 count; 16 | uint64 timeSum; 17 | uint64 maxTime; 18 | } WaitEventData; 19 | 20 | 21 | static HTAB *waitEventsDataStorage = NULL; 22 | 23 | static WaitEventData *currentWaitEvent; 24 | 25 | static uint64 startWaitEventTime; 26 | 27 | static UprobeAttachInterface *attachedUprobes = NULL; 28 | 29 | 30 | static char *waitFuncs[] = { 31 | "pwrite", 32 | "ftruncate", 33 | "fsync", 34 | /* "PGSemaphoreLock", conflict with tracing locks on shared buffers, signalWaitEventStart(End) are used to inform about this event */ 35 | "pread", 36 | "fgets", 37 | "read", 38 | "write", 39 | "pwritev", 40 | "fdatasync", 41 | "pg_usleep", 42 | #if defined(HAVE_SYNC_FILE_RANGE) 43 | "sync_file_range", 44 | #endif 45 | 46 | #ifdef HAVE_POSIX_FALLOCATE 47 | "posix_fallocate", 48 | #endif 49 | 50 | #if defined(WAIT_USE_EPOLL) 51 | "epoll_wait" 52 | #elif defined(WAIT_USE_KQUEUE) 53 | "kevent" 54 | #elif defined(WAIT_USE_POLL) 55 | "poll" 56 | #endif 57 | }; 58 | 59 | #define sizeofWaitFuncs sizeof(waitFuncs) / sizeof(waitFuncs[0]) 60 | 61 | static uint32 WaitEventDataHash(const void *key, Size keysize); 62 | static int WaitEventDataCmp(const void *key1, const void *key2, Size keysize); 63 | static void WaitEventsDatStorageInit(); 64 | static void WaitEventsDataStorageDelete(); 65 | 66 | static void TraceWaitEventInFunc(void *data); 67 | static void TraceWaitEventRetFunc(void *data); 68 | static void TraceWaitEventClean(UprobeAttachInterface *uprobe); 69 | static void TraceWaitEventWriteData(StringInfo stream, WaitEventData *data); 70 | 71 | static uint32 72 | WaitEventDataHash(const void *key, Size keysize) 73 | { 74 | Assert(keysize == sizeof(uint32)); 75 | return *((uint32 *) key); 76 | } 77 | 78 | 79 | static int 80 | WaitEventDataCmp(const void *key1, const void *key2, Size keysize) 81 | { 82 | int32 k1 = *((uint32 *) key1); 83 | int32 k2 = *((uint32 *) key2); 84 | 85 | Assert(keysize == sizeof(uint32)); 86 | 87 | if (k1 - k2) 88 | return 1; 89 | return 0; 90 | } 91 | 92 | 93 | static void 94 | WaitEventsDatStorageInit(void) 95 | { 96 | HASHCTL map_info; 97 | 98 | map_info.keysize = sizeof(uint32); 99 | map_info.entrysize = sizeof(WaitEventData); 100 | map_info.hash = &WaitEventDataHash; 101 | map_info.match = &WaitEventDataCmp; 102 | waitEventsDataStorage = hash_create("map for trace wait events", 1024, &map_info, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); 103 | } 104 | 105 | 106 | static void 107 | WaitEventsDataStorageDelete(void) 108 | { 109 | hash_destroy(waitEventsDataStorage); 110 | waitEventsDataStorage = NULL; 111 | } 112 | 113 | 114 | static void 115 | TraceWaitEventInFunc(void *data) 116 | { 117 | bool isFound; 118 | struct timespec time; 119 | 120 | if (*my_wait_event_info == 0) 121 | return; 122 | 123 | if (CritSectionCount == 0) 124 | { 125 | currentWaitEvent = hash_search(waitEventsDataStorage, my_wait_event_info, HASH_ENTER, &isFound); 126 | } 127 | else 128 | { 129 | currentWaitEvent = hash_search(waitEventsDataStorage, my_wait_event_info, HASH_FIND, &isFound); 130 | if (currentWaitEvent == NULL) 131 | return; 132 | } 133 | 134 | if (!isFound) 135 | { 136 | currentWaitEvent->count = 1; 137 | currentWaitEvent->maxTime = 0; 138 | currentWaitEvent->timeSum = 0; 139 | } 140 | else 141 | currentWaitEvent->count++; 142 | 143 | clock_gettime(CLOCK_MONOTONIC, &time); 144 | startWaitEventTime = time.tv_nsec + time.tv_sec * 1000000000L; 145 | } 146 | 147 | 148 | static void 149 | TraceWaitEventRetFunc(void *data) 150 | { 151 | struct timespec time; 152 | uint64 timeDiff; 153 | 154 | if (!currentWaitEvent) 155 | return; 156 | 157 | clock_gettime(CLOCK_MONOTONIC, &time); 158 | timeDiff = time.tv_nsec + time.tv_sec * 1000000000L - startWaitEventTime; 159 | 160 | currentWaitEvent->timeSum += timeDiff; 161 | if (currentWaitEvent->maxTime < timeDiff) 162 | currentWaitEvent->maxTime = timeDiff; 163 | 164 | currentWaitEvent = NULL; 165 | } 166 | 167 | /* used to tell that wait event has started if actual working function is used elsewhere */ 168 | void 169 | SignalWaitEventStart(uint64 time) 170 | { 171 | bool isFound; 172 | 173 | if (*my_wait_event_info == 0) 174 | return; 175 | 176 | if (CritSectionCount == 0) 177 | { 178 | currentWaitEvent = hash_search(waitEventsDataStorage, my_wait_event_info, HASH_ENTER, &isFound); 179 | } 180 | else 181 | { 182 | currentWaitEvent = hash_search(waitEventsDataStorage, my_wait_event_info, HASH_FIND, &isFound); 183 | if (currentWaitEvent == NULL) 184 | return; 185 | } 186 | 187 | if (!isFound) 188 | { 189 | currentWaitEvent->count = 1; 190 | currentWaitEvent->maxTime = 0; 191 | currentWaitEvent->timeSum = 0; 192 | } 193 | else 194 | currentWaitEvent->count++; 195 | 196 | startWaitEventTime = time; 197 | } 198 | 199 | /* used to tell that wait event has ended if actual working function is used elsewhere */ 200 | void 201 | SignalWaitEventEnd(uint64 time) 202 | { 203 | uint64 timeDiff; 204 | 205 | if (!currentWaitEvent) 206 | return; 207 | 208 | timeDiff = time - startWaitEventTime; 209 | 210 | currentWaitEvent->timeSum += timeDiff; 211 | if (currentWaitEvent->maxTime < timeDiff) 212 | currentWaitEvent->maxTime = timeDiff; 213 | } 214 | 215 | 216 | static void 217 | TraceWaitEventClean(UprobeAttachInterface *uprobe) 218 | { 219 | 220 | } 221 | 222 | 223 | UprobeAttachInterface * 224 | TraceWaitEventsUprobesGet(size_t *resSize) 225 | { 226 | WaitEventsDatStorageInit(); 227 | attachedUprobes = palloc0(sizeof(UprobeAttachInterface) * sizeofWaitFuncs); 228 | for (int i = 0; i < sizeofWaitFuncs; i++) 229 | { 230 | attachedUprobes[i].cleanFunc = TraceWaitEventClean; 231 | attachedUprobes[i].inFunc = TraceWaitEventInFunc; 232 | attachedUprobes[i].retFunc = TraceWaitEventRetFunc; 233 | attachedUprobes[i].targetSymbol = waitFuncs[i]; 234 | } 235 | *resSize = sizeofWaitFuncs; 236 | return attachedUprobes; 237 | } 238 | 239 | 240 | void 241 | TraceWaitEventsUprobesClean(void) 242 | { 243 | if (!attachedUprobes) 244 | return; 245 | pfree(attachedUprobes); 246 | WaitEventsDataStorageDelete(); 247 | } 248 | 249 | 250 | void 251 | TraceWaitEventsClearStat(void) 252 | { 253 | WaitEventsDataStorageDelete(); 254 | WaitEventsDatStorageInit(); 255 | } 256 | 257 | 258 | static void 259 | TraceWaitEventWriteData(StringInfo stream, WaitEventData *data) 260 | { 261 | if (writeMode == TEXT_WRITE_MODE) 262 | { 263 | appendStringInfo(stream, 264 | "name=%s count=%lu timeSum=%lu nanosec maxTime=%lu nanosec\n", 265 | pgstat_get_wait_event(data->eventId), 266 | data->count, 267 | data->timeSum, 268 | data->maxTime 269 | ); 270 | } 271 | else 272 | { 273 | appendStringInfo(stream, 274 | " {\n" 275 | " \"name\": \"%s\",\n" 276 | " \"count\": %lu,\n" 277 | " \"timeSum\": \"%lu nanosec\",\n" 278 | " \"maxTime\": \"%lu nanosec\"\n" 279 | " },\n", 280 | pgstat_get_wait_event(data->eventId), 281 | data->count, 282 | data->timeSum, 283 | data->maxTime 284 | ); 285 | } 286 | } 287 | 288 | 289 | bool 290 | TraceWaitEventDumpStat(StringInfo out) 291 | { 292 | HASH_SEQ_STATUS mapIterator; 293 | WaitEventData *mapEntry; 294 | bool hasInfo = false; 295 | 296 | if (!waitEventsDataStorage) 297 | return hasInfo; 298 | 299 | if (writeMode == JSON_WRITE_MODE) 300 | appendStringInfo(out, "[\n"); 301 | 302 | hash_seq_init(&mapIterator, waitEventsDataStorage); 303 | mapEntry = (WaitEventData *) hash_seq_search(&mapIterator); 304 | while (mapEntry) 305 | { 306 | TraceWaitEventWriteData(out, mapEntry); 307 | 308 | mapEntry = (WaitEventData *) hash_seq_search(&mapIterator); 309 | hasInfo = true; 310 | } 311 | if (writeMode == JSON_WRITE_MODE && hasInfo) 312 | { 313 | /* delete last ',' in array */ 314 | out->data[out->len - 2] = ' '; 315 | appendStringInfo(out, "]\n"); 316 | } 317 | WaitEventsDataStorageDelete(); 318 | WaitEventsDatStorageInit(); 319 | 320 | return hasInfo; 321 | } 322 | -------------------------------------------------------------------------------- /doc/profile_func.md: -------------------------------------------------------------------------------- 1 | # Профилирование функций PostgreSQL 2 | 3 | - [Введение](#введение) 4 | - [Типы проб](#типы-проб) 5 | - [Подходящие функции для мониторинга](#подходящие-функции-для-мониторинга) 6 | - [Установка пробы](#установка-пробы) 7 | - [Информация о пробах](#информация-о-пробах) 8 | - [Показать установленные пробы](#показать-установленные-пробы) 9 | - [Результат профилирования для текущего сеанса](#результат-профилирования-для-текущего-сеанса) 10 | - [TIME. Время выполнения функции](#time-время-выполнения-функции) 11 | - [HIST. Гистограмма времени выполнения функции](#hist-гистограмма-времени-выполнения-функции) 12 | - [Результат профилирования для всех сеансов](#результат-профилирования-для-всех-сеансов) 13 | - [Получить результат профилирования](#получить-результат-профилирования) 14 | - [Удаление пробы](#удаление-пробы) 15 | 16 | 17 | ## Введение 18 | 19 | Расширение позволяет устанавливать динамические пробы на функции PostgreSQL в user space и детально исследовать внутреннюю работу СУБД. 20 | 21 | ### Типы проб 22 | 23 | | Проб | Имя | Описание | 24 | |--------|------|-----------| 25 | | TIME | Время выполнения функции | Данный тип проб позволяет отслеживать среднее время выполнения функции и количество вызовов функции. | 26 | | HIST | Гистограмма времени выполнения функции| Собирается информация о времени выполнения функции в виде гистограммы. Это позволяет более детально изучить как работает функция при разных обстоятельствах. | 27 | | MEM | Информация о том как изменялась память при вызове функции | Собирается информация о том как меняется память до входа в функцию и после выхода из неё.| 28 | 29 | ### Подходящие функции для мониторинга 30 | Чтобы проверить, находится ли необходимая вам функция в ELF файле, можно воспользоваться следующими командами: 31 | 32 | **objdump**: 33 | ```shell 34 | objdump -T /opt/pgpro/ent-15/bin/postgres 35 | objdump -T /opt/pgpro/ent-15/bin/postgres | awk '{ print $7 }' 36 | ``` 37 | 38 | **readelf**: 39 | ```shell 40 | readelf -s -W /opt/pgpro/ent-15/bin/postgres 41 | readelf -s -W /opt/pgpro/ent-15/bin/postgres | awk '{ print $8 }' 42 | ``` 43 | 44 | **nm**: 45 | ```shell 46 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres 47 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres | awk '{ print $NF }' 48 | ``` 49 | Где /opt/pgpro/ent-15/bin/postgres - путь до установленных бинарников PostgreSQL. 50 | 51 | ## Установка пробы 52 | 53 | Чтобы установить пробу, необходимо воспользоваться следующей ```sql``` функцией: 54 | 55 | ```set_uprobe(IN func text, IN uprobe_type text, IN is_shared boolean);``` 56 | 57 | | Аргумент | Описание | 58 | |---|---| 59 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 60 | | uprobe_type | Тип пробы
  • TIME - Время выполнения функции
  • HIST - Гистограмма времени выполнения функции|
| 61 | | is_shared | Признак установки пробы на текущий сеанс или на все новые сеансы
  • false - Проб устанавливается только для текущего сеанса и вся собранная информация также храниться в этом сеансе
  • true - Проб устанавливается для всех новых сеансов. Все новые сеансы сбрасывают собранную информацию в общую память|
| 62 | 63 | При корректном завершении ```set_uprobe``` возвращает имя func. 64 | 65 | ## Информация о пробах 66 | 67 | ### Показать установленные пробы 68 | Чтобы посмотреть все установленные пробы, необходимо воспользоваться следующим запросом: 69 | 70 | ```select list_uprobes();``` 71 | 72 | Запрос возвращает набор строк, в каждой строке содержится запись, которая соответствует одному установленному пробу. 73 | 74 | Запись имеет вид: 75 | 76 | ```(func, uprobe_type, is_shared)``` 77 | 78 | | Аргумент | Описание | 79 | |---|---| 80 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 81 | | uprobe_type | см. [типы проб](#типы-проб) | 82 | | is_shared | Признак установки пробы на текущий сеанс или на все новые сеансы
  • false - Проб устанавливается только для текущего сеанса и вся собранная информация также храниться в этом сеансе
  • true - Проб устанавливается для всех новых сеансов. Все новые сеансы сбрасывают собранную информацию в общую память|
| 83 | 84 | ### Результат профилирования для текущего сеанса 85 | 86 | #### TIME. Время выполнения функции 87 | Чтобы получить информации о пробе, установленной на текущем сеансе, необходимо воспользоваться следующей ```sql``` функцией: 88 | 89 | ```stat_time_uprobe(IN func text)``` 90 | 91 | | Аргумент | Описание | 92 | |---|---| 93 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 94 | 95 | При корректном завершении возвращает строку вида: 96 | ```"calls: {количество вызовов функции} avg time: {среднее время выполнения функции} ns"```. 97 | 98 | #### HIST. Гистограмма времени выполнения функции 99 | Чтобы получить гистограмму времени выполнения функции, необходимо воспользоваться следующей ```sql``` функцией: 100 | 101 | 102 | ```stat_hist_uprobe( IN func text, IN start double precision, IN stop double precision, IN step double precision)``` 103 | 104 | | Аргумент | Описание | 105 | |---|---| 106 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 107 | | start | Значение в микросекундах, с которого нужно начать строить гистограмму. Если функция выполнялась меньше указанного значения, то эти времена не попадут в итоговую гистограмму | 108 | | stop | Значение в микросекундах, на котором нужно закончить строить гистограмму. Если функция выполнялась больше указанного значения, то эти времена не попадут в итоговую гистограмму | 109 | | step | Значение в микросекундах, шаг гистограммы | 110 | 111 | 112 | Если параметры start, stop, step для прошлой функции не известны заранее, то можно воспользоваться функцией, которая подберет их автоматически: 113 | 114 | 115 | ```stat_hist_uprobe( IN func text)``` 116 | 117 | | Аргумент | Описание | 118 | |---|---| 119 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 120 | 121 | 122 | При корректном завершении возвращает набор строк, который в ```psql``` будет выглядеть как гистограмма, аналогичная выводу ```bpftrace```. 123 | 124 | - time_range - Интервал времени 125 | - hist_entry - Строка для красоты забитая '@', один символ за 2% из percent 126 | - percent - Процент измерений, который попал в данный интервал 127 | 128 | Примечание: Чтобы гистограмма была выравненной, нужно получать данные из функции, а не вызывать её напрямую. Правильный вариант: 129 | ```sql 130 | select * from stat_hist_uprobe('PortalStart'); 131 | ``` 132 | 133 | ### Результат профилирования для всех сеансов 134 | 135 | Основное отличие от профилирования собственного процесса заключается в том, что результат профилирования будет доступен на файловой системе, а не сразу в виде результата функций. 136 | 137 | #### Получить результат профилирования 138 | Чтобы получить результаты профилирования для всех сеансов, необходимо воспользоваться следующей функцией: 139 | 140 | ```dump_uprobe_stat(IN func text, IN should_empty_stat boolean)``` 141 | 142 | | Аргумент | Описание | 143 | |---|---| 144 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 145 | | should_empty_stat | Признак сброса собранной информации.
  • false - Собранная информация не удаляется и продолжает собираться
  • true - Собранная информация удаляется
| 146 | 147 | При корректном завершении в каталоге **pg_uprobe.data_dir** создается файл с собранной информацией. Для каждого [типа проб](#типы-проб) свой формат: 148 | 149 | | Тип пробы | Описание файла | 150 | |---|---| 151 | | TIME | Имя файла: TIME_{func}.txt
В файле строка вида:
```num calls: {количество вызовов функции} avg time: {среднее время выполнения функции} nanosec```| 152 | | HIST | Имя файла: HIST_{func}.txt
В файле строки вида:
```time,count```
  • time - Время в наносекундах сколько работала функция
  • count - Сколько раз выполнялась функция с указанным временем
| 153 | | MEM | Имя файла: MEM_{func}.txt
В файле строки вида:
```memory,count```
  • memory - Размер в байтах на сколько менялась выделенная память до входа в функцию и после неё
  • count - Сколько раз выполнялась функция с указанным временем
| 154 | 155 | ## Удаление пробы 156 | Чтобы удалить пробу, необходимо воспользоваться следующей функцией: 157 | 158 | ```delete_uprobe(IN func text, IN should_write_stat boolean)``` 159 | 160 | | Аргумент | Описание | 161 | |---|---| 162 | | func | Название функции из исполняемого файла или подгружаемой библиотеки | 163 | | should_write_stat | Только когда профилируем множество сеансов. Если true, то перед удалением пробы собирается результаты профилирования для всех сеансов аналогично функции | 164 | 165 | Важно отметить, что сам проб удаляется только на текущем сеансе. С остальных сеансов проб не будет удален, но на новых уже появляться не будет. -------------------------------------------------------------------------------- /src/lockmanager_trace.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "utils/hsearch.h" 4 | #include "storage/lwlock.h" 5 | 6 | #include "uprobe_message_buffer.h" 7 | #include "trace_file.h" 8 | 9 | #include "lockmanager_trace.h" 10 | 11 | #define INVALID_PARTITION -1 12 | 13 | typedef struct LockManagerMapKey 14 | { 15 | int partition; 16 | uint64 time; 17 | } LockManagerMapKey; 18 | 19 | 20 | typedef struct LockManagerMapEntry 21 | { 22 | LockManagerMapKey key; 23 | uint64 count; 24 | } LockManagerMapEntry; 25 | 26 | 27 | typedef struct LockManagerStorage 28 | { 29 | UprobeStorage base; 30 | 31 | HTAB *map; 32 | } LockManagerStorage; 33 | 34 | 35 | typedef struct LockManagerMessage 36 | { 37 | Message base; 38 | 39 | LockManagerMapEntry entry; 40 | } LockManagerMessage; 41 | 42 | 43 | typedef struct LockManagerData 44 | { 45 | HTAB *map; 46 | } LockManagerData; 47 | 48 | 49 | /* lock Acquire data */ 50 | static int partition; 51 | static uint64 acquireTime; 52 | 53 | 54 | static void LWLockAcquireInFunc(void *data, LWLock *lock); 55 | static void LWLockAcquireRetFunc(void *data); 56 | static void LWLockAcquireClean(UprobeAttachInterface *this); 57 | static HTAB *CreateLockManagerHashMap(void); 58 | static void LWLockReleaseInFunc(LockManagerData *data); 59 | static void LWLockReleaseRetFunc(LockManagerData *data); 60 | static void LWLockReleaseTimedCallback(UprobeAttachInterface *this); 61 | static void LWLockReleaseClean(UprobeAttachInterface *this); 62 | static void LockManagerStoragePutData(LockManagerStorage *storage, LockManagerMessage *mes); 63 | static void LockManagerStorageWriteStat(LockManagerStorage *storage, bool shouldClearStat); 64 | static void LockManagerStorageDelete(LockManagerStorage *storage, bool shouldWriteStat); 65 | 66 | 67 | static void 68 | LWLockAcquireInFunc(void *data, LWLock *lock) 69 | { 70 | LWLockPadded *l; 71 | 72 | partition = INVALID_PARTITION; 73 | if (lock->tranche != LWTRANCHE_LOCK_MANAGER) 74 | return; 75 | 76 | l = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET; 77 | for (int id = 0; id < NUM_LOCK_PARTITIONS; id++, l++) 78 | { 79 | if (&l->lock == lock) 80 | partition = id; 81 | } 82 | } 83 | 84 | 85 | static void 86 | LWLockAcquireRetFunc(void *data) 87 | { 88 | struct timespec time; 89 | 90 | if (partition == INVALID_PARTITION) 91 | return; 92 | 93 | 94 | clock_gettime(CLOCK_MONOTONIC, &time); 95 | acquireTime = time.tv_nsec + time.tv_sec * 1000000000L; 96 | } 97 | 98 | 99 | static void 100 | LWLockAcquireClean(UprobeAttachInterface *this) 101 | { 102 | if (this == NULL) 103 | return; 104 | 105 | pfree(this->targetSymbol); 106 | pfree(this); 107 | } 108 | 109 | 110 | static HTAB * 111 | CreateLockManagerHashMap(void) 112 | { 113 | HTAB *map; 114 | HASHCTL map_info; 115 | 116 | map_info.keysize = sizeof(LockManagerMapKey); 117 | map_info.entrysize = sizeof(LockManagerMapEntry); 118 | map = hash_create("map for Lock Manager trace", 1024, &map_info, HASH_ELEM | HASH_BLOBS); 119 | return map; 120 | } 121 | 122 | 123 | static void 124 | LWLockReleaseInFunc(LockManagerData *data) 125 | { 126 | struct timespec time; 127 | uint64 timeDiff; 128 | LockManagerMapKey key; 129 | LockManagerMapEntry *entry; 130 | bool isFound; 131 | 132 | if (partition == INVALID_PARTITION) 133 | return; 134 | 135 | clock_gettime(CLOCK_MONOTONIC, &time); 136 | timeDiff = time.tv_nsec + time.tv_sec * 1000000000L - acquireTime; 137 | key.partition = partition; 138 | key.time = timeDiff / 100 + ((timeDiff % 100 >= 50) ? 1 : 0); 139 | 140 | entry = (LockManagerMapEntry *) hash_search(data->map, &key, HASH_ENTER_NULL, &isFound); 141 | if (likely(entry)) 142 | { 143 | if (isFound) 144 | entry->count++; 145 | else 146 | entry->count = 1; 147 | } 148 | } 149 | 150 | 151 | static void 152 | LWLockReleaseRetFunc(LockManagerData *data) 153 | { 154 | 155 | } 156 | 157 | 158 | static void 159 | LWLockReleaseTimedCallback(UprobeAttachInterface *this) 160 | { 161 | LockManagerData *data = (LockManagerData *) this->data; 162 | LockManagerMessage messages[1024]; 163 | uint32 currentIndex = 0; 164 | uint32 numSend = 0; 165 | HASH_SEQ_STATUS mapIterator; 166 | LockManagerMapEntry *mapEntry; 167 | 168 | hash_seq_init(&mapIterator, data->map); 169 | mapEntry = (LockManagerMapEntry *) hash_seq_search(&mapIterator); 170 | while (mapEntry) 171 | { 172 | messages[currentIndex].base.type = MESSAGE_CUSTOM; 173 | messages[currentIndex].base.size = sizeof(LockManagerMessage); 174 | messages[currentIndex].entry = *mapEntry; 175 | currentIndex++; 176 | if (currentIndex == 1024) 177 | { 178 | numSend = MessageBufferPut((Message *) messages, currentIndex, this->targetSymbol); 179 | if (numSend != currentIndex) 180 | { 181 | memmove(messages, &messages[numSend], (currentIndex - numSend) * sizeof(LockManagerMessage)); 182 | } 183 | currentIndex = currentIndex - numSend; 184 | } 185 | mapEntry = (LockManagerMapEntry *) hash_seq_search(&mapIterator); 186 | } 187 | while (currentIndex != 0) 188 | { 189 | numSend = MessageBufferPut((Message *) messages, currentIndex, this->targetSymbol); 190 | if (numSend != currentIndex) 191 | { 192 | memmove(messages, &messages[numSend], (currentIndex - numSend) * sizeof(LockManagerMessage)); 193 | } 194 | currentIndex = currentIndex - numSend; 195 | } 196 | 197 | hash_destroy(data->map); 198 | data->map = CreateLockManagerHashMap(); 199 | } 200 | 201 | 202 | static void 203 | LWLockReleaseClean(UprobeAttachInterface *this) 204 | { 205 | LockManagerData *data; 206 | 207 | if (!this) 208 | return; 209 | data = (LockManagerData *) this->data; 210 | hash_destroy(data->map); 211 | pfree(data); 212 | pfree(this->targetSymbol); 213 | pfree(this); 214 | } 215 | 216 | 217 | static void 218 | LockManagerStoragePutData(LockManagerStorage *storage, LockManagerMessage *mes) 219 | { 220 | LockManagerMapEntry *entry; 221 | bool isFound; 222 | 223 | entry = (LockManagerMapEntry *) hash_search(storage->map, &mes->entry.key, HASH_ENTER_NULL, &isFound); 224 | if (likely(entry)) 225 | { 226 | if (isFound) 227 | { 228 | entry->count += mes->entry.count; 229 | } 230 | else 231 | { 232 | entry->count = mes->entry.count; 233 | } 234 | } 235 | } 236 | 237 | 238 | static void 239 | LockManagerStorageWriteStat(LockManagerStorage *storage, bool shouldClearStat) 240 | { 241 | FILE *files[256]; 242 | char filePath[MAXPGPATH]; 243 | HASH_SEQ_STATUS mapIterator; 244 | LockManagerMapEntry *mapEntry; 245 | 246 | for (int i = 0; i < NUM_LOCK_PARTITIONS; i++) 247 | { 248 | sprintf(filePath, "%sLock_Manager_%d.txt", dataDir, i); 249 | files[i] = fopen(filePath, "w"); 250 | if (files[i] == NULL) 251 | { 252 | elog(LOG, "can't open file %s for writting", filePath); 253 | 254 | for (int j = 0; j < i; j++) 255 | fclose(files[j]); 256 | 257 | return; 258 | } 259 | fprintf(files[i], "time,count\n"); 260 | } 261 | 262 | hash_seq_init(&mapIterator, storage->map); 263 | mapEntry = (LockManagerMapEntry *) hash_seq_search(&mapIterator); 264 | while (mapEntry) 265 | { 266 | double time = (double) mapEntry->key.time / 10.0; 267 | 268 | fprintf(files[mapEntry->key.partition], "%.1lf,%lu\n", time, mapEntry->count); 269 | 270 | mapEntry = (LockManagerMapEntry *) hash_seq_search(&mapIterator); 271 | } 272 | 273 | for (int i = 0; i < NUM_LOCK_PARTITIONS; i++) 274 | fclose(files[i]); 275 | 276 | if (shouldClearStat) 277 | { 278 | hash_destroy(storage->map); 279 | 280 | storage->map = CreateLockManagerHashMap(); 281 | } 282 | } 283 | 284 | 285 | static void 286 | LockManagerStorageDelete(LockManagerStorage *storage, bool shouldWriteStat) 287 | { 288 | if (shouldWriteStat) 289 | LockManagerStorageWriteStat(storage, false); 290 | hash_destroy(storage->map); 291 | pfree(storage->base.symbol); 292 | pfree(storage); 293 | } 294 | 295 | 296 | UprobeAttachInterface * 297 | LWLockAcquireInit(const char *symbol) 298 | { 299 | UprobeAttachInterface *res = palloc(sizeof(UprobeAttachInterface)); 300 | 301 | res->inFunc = LWLockAcquireInFunc; 302 | res->retFunc = LWLockAcquireRetFunc; 303 | res->timedCallback = NULL; 304 | res->cleanFunc = LWLockAcquireClean; 305 | res->needRetVal = false; 306 | res->numArgs = 1; 307 | res->data = NULL; 308 | res->targetSymbol = pstrdup(symbol); 309 | return res; 310 | } 311 | 312 | UprobeAttachInterface * 313 | LWLockReleaseInit(const char *symbol) 314 | { 315 | UprobeAttachInterface *res = palloc(sizeof(UprobeAttachInterface)); 316 | LockManagerData *storage = palloc(sizeof(LockManagerData)); 317 | 318 | res->inFunc = LWLockReleaseInFunc; 319 | res->retFunc = LWLockReleaseRetFunc; 320 | res->timedCallback = LWLockReleaseTimedCallback; 321 | res->cleanFunc = LWLockReleaseClean; 322 | res->needRetVal = false; 323 | res->numArgs = 0; 324 | res->data = storage; 325 | res->targetSymbol = pstrdup(symbol); 326 | storage->map = CreateLockManagerHashMap(); 327 | return res; 328 | } 329 | 330 | UprobeStorage * 331 | LockManagerStorageInit(const char *symbol) 332 | { 333 | LockManagerStorage *storage = palloc(sizeof(LockManagerStorage)); 334 | 335 | storage->base.delete = (StorageDeleteFunc) LockManagerStorageDelete; 336 | storage->base.putData = (StoragePutDataFunc) LockManagerStoragePutData; 337 | storage->base.writeStat = (StorageWriteStat) LockManagerStorageWriteStat; 338 | storage->base.symbol = pstrdup(symbol); 339 | storage->map = CreateLockManagerHashMap(); 340 | 341 | return (UprobeStorage *) storage; 342 | } 343 | 344 | UprobeStorage * 345 | NullStorageInit(const char *symbol) 346 | { 347 | return NULL; 348 | } 349 | -------------------------------------------------------------------------------- /doc/trace_session.md: -------------------------------------------------------------------------------- 1 | # Трассирование сеансов 2 | 3 | - [Введение](#введение) 4 | - [Ограничения](#ограничения) 5 | - [Использование](#использование) 6 | - [Трассирование текущего сеанса](#трассирование-текущего-сеанса) 7 | - [Трассирование другого сеанса по PID](#трассирование-другого-сеанса-по-pid) 8 | - [Результат трассирования](#результат-трассирования) 9 | 10 | ## Введение 11 | 12 | Когда подключение к СУБД уже создано, с помощью расширения можно включить трассирование всех запросов, которые будут выполняться в рамках этого сеанса. Включить трассирование можно как для своего сеанса, используя функцию start_session_trace(), так и для трассирования других сеансов, например сеансов, созданных приложением. 13 | 14 | По собранной информации можно определить следующее: 15 | 16 | - Текст запроса 17 | - Время затраченное на разбор запроса 18 | - План запроса 19 | - Тип плана запроса(generic, custom) 20 | - Время, затраченное на планирование запроса 21 | - Время, затраченное на выполнение всего запроса 22 | - Время, затраченное на выполнение каждого узла плана 23 | - Время, затраченное на ожидание событий(wait events). Например: чтение файлов, ожидание блокировки 24 | - Блокировки в разделяемой памяти, захваченные во время выполнения запроса 25 | 26 | Эта информация будет собрана для каждого запроса, выполнявшегося в момент трассирования. 27 | 28 | ## Ограничения 29 | 30 | При трассировании сеансов время выполнения запросов может увеличиться. В наших измерениях скорость выполнения падает на ~5%. Поэтому не стоит оставлять трассирование сеансов на длительное время. Этот инструмент предназначен, в первую очередь, для исследования проблемы, а не её обнаружения. 31 | 32 | 33 | ## Использование 34 | 35 | ### Трассирование текущего сеанса 36 | Чтобы начать трассирование текущего сеанса, необходимо воспользоваться следующей ```sql``` функцией: 37 | 38 | ```start_session_trace();``` 39 | 40 | Чтобы завершить трассирование текущего сеанса и сформировать отчёт, необходимо воспользоваться следующей ```sql``` функцией: 41 | 42 | ```stop_session_trace();``` 43 | 44 | ### Трассирование другого сеанса по PID 45 | 46 | Чтобы начать трассирование сеанса, который запущен в другом процессе, необходимо воспользоваться следующей ```sql``` функцией: 47 | 48 | ```start_session_trace(IN pid INT);``` 49 | 50 | | Аргумент | Описание | 51 | |---|---| 52 | | pid | Идентификатор процесса, в котором необходимо включить трассирование сеанса | 53 | 54 | Примечание: Трассирование начнётся после завершения первого SQL-оператора. При этом сам первый SQL-оператор не будет записан в результирующий файл. 55 | 56 | Чтобы завершить трассирование сеанса, который запущен в другом процессе, необходимо воспользоваться следующей ```sql``` функцией: 57 | 58 | ```stop_session_trace(IN pid INT);``` 59 | 60 | | Аргумент | Описание | 61 | |---|---| 62 | | pid | Идентификатор процесса, в котором необходимо завершить трассирование сеанса | 63 | 64 | ## Результат трассирования 65 | Результат трассирования будет находиться в каталоге, указанном в настройке `pg_uprobe.data_dir`(по умолчанию `$PGDATA/pg_uprobe`).- Название файла имеет следующий вид: `[Настройка pg_uprobe.trace_file_name]_[PID_процесса]`. 66 | 67 | Формат вывода информации для трассирования сеанса определяется настройкой `pg_uprobe.trace_write_mode`. Поддерживаемые значения: 68 | - json: Предпочтительный вариант, который сейчас поддерживается в первую очередь 69 | - text: Вариант, который подходит для анализа небольших сценариев, в котором информация представлена в удобном для чтения виде 70 | 71 | Пример результата трассирования простого запроса: [example_trace_session.json](./example_trace_session.json) 72 | 73 | Json schema: [trace_json_schema.json](./trace_json_schema.json) 74 | 75 | На верхнем уровне json выглядит так: 76 | 77 | ``` json 78 | { 79 | "pid": 12345, //pid процесса, который трассируется 80 | "queries": [] // запросы исполняемые процессом. 81 | } 82 | ``` 83 | 84 | Далее речь пойдет о поле "queries" - это массив, где каждый объект представляет собой разбор одного выполненного запроса, массив отсортирован в порядке выполнения этих запросов. 85 | 86 | Пример: 87 | ```json 88 | [ 89 | { 90 | // Данные первого запроса 91 | }, 92 | { 93 | // Данные второго запроса 94 | } 95 | ] 96 | ``` 97 | Рассмотрим подробнее данные, полученные при трассировании одного запроса. 98 | 99 | ```json 100 | { 101 | "parsingTime": "70093 nanosec", // Время затраченное на разбор SQL запроса 102 | "planningTime": "8521955 nanosec", // Время затраченное на планирование SQL запроса 103 | "explain": object, // Вывод команды explain для запроса в json формате 104 | "planType": "custom", // Опциональное поле. Тип плана выбран для подготовленного запроса: generic или custom 105 | "LWLockPlanning": object, // Опциональное поле. Статистика блокировок страниц буферного кэша во время выполнения планирования запроса 106 | "executionStart": "2077:06:26T11:34:39.683", // Время начала работы исполнителя запросов в формате "гггг:мм:ддTчч:мм:сс.ссс" 107 | "executionEvents": array[object], // Узлы выполнения запроса, в хронологическом порядке выполнения 108 | "executionTime": "282504 nanosec", // Время затраченное на выполнение запроса 109 | "executorNodeStatInPlan": object, // Вывод команды explain для запроса в json формате с статистикой выполнения каждого узла 110 | "exprNodeStat": array[object], // Статистика для вычисляемых выражений 111 | "params": object, // Опциональное поле. Параметры для подготовленного запроса 112 | "locksInsidePortalRun": object, // Опциональное поле. Статистика блокировок страниц буферного кэша во время выполнения запроса. Содержит объекты типа LWLockStat 113 | "locksOutsidePortalRun": object, // Опциональное поле. Статистика блокировок страниц буферного кэша до выполнения запроса или после. Содержит объекты типа LWLockStat 114 | "waitEventStat": object // Опциональное поле. Статистика по событиям ожидания за время выполнения запроса 115 | } 116 | ``` 117 | 118 | Рассмотрим детально поле `explain`: 119 | ```json 120 | { 121 | "Query Text": "select * from t where id = 1;", // Сам запрос 122 | "Plan": object, // Узлы плана 123 | /* Другие поля из explain с типом JSON*/ 124 | } 125 | ``` 126 | 127 | После выполнения планирования мы переходим к выполнению, во время которого поочерёдно выполняются узлы плана, для них пишется следующая статистика: 128 | ```json 129 | "executionEvents": [ 130 | { 131 | "node": "ScanState", // Название узла 132 | "explain": object , // Часть текста команды explain для конкретно этого узла плана 133 | "executeTime": "32256 nanosec", // Время выполнения узла 134 | "LWLockStat": object, // Объект типа LWLockStat 135 | } 136 | ] 137 | ``` 138 | Так же в поле executionEvents попадают данные о исполнении подзапросов (например при выполнении pl/pgsql функций), это такие-же объекты как и при трассировании обычных запросов. 139 | 140 | Для дальнейшего анализа рассмотрим объекты LWLockStat. В нём собирается статистика блокировок LWLock на буферный кэш или просто на отдельные блокировки, которые мы ожидали. Чтобы включить отображение всех блокировок, которые брались в это время необходимо воспользоваться настройкой `pg_uprobe.write_only_sleep_lwlocks_stat`. 141 | 142 | Данная статистика собирается для каждой страницы буферного кэша, к которой обращались во время выполнения той или иной операции. 143 | ```json 144 | { 145 | "bufferTag": {// к какому объекту базы данных относится страница с диска 146 | "spcOid": 1663, // oid табличного пространства 147 | "spcName": "pg_default", // Имя табличного пространства 148 | "dbOid": 5, // oid базы данных 149 | "dbName": "postgres", // Имя базы данных 150 | "relNumber": 2662, // oid отношения 151 | "relName": "pg_catalog.pg_class_oid_index", // Полное имя отношения 152 | "relKind": "i", // Тип отношения сокращённо (в данном случае это индекс) 153 | "forkName": "main", // Тип файла (в данном случае файл с данными) 154 | "blockNumber": 3 // Порядковый номер блока в файле 155 | }, 156 | "exclusive": { // Статистика для взятия эксклюзивной блокировки 157 | "totalCalls": 0, // Количество взятий блокировки 158 | "sleepCount": 0, // Количество засыпаний при ожидании блокировки 159 | "sleepTimeSum": "0 nanosec", // Общее время ожидания 160 | "maxSleepTime": "0 nanosec" // Максимальное время одного ожидания 161 | }, 162 | "shared": { // Статистика для разделяемой эксклюзивной блокировки 163 | "totalCalls": 5, // Количество взятий блокировки 164 | "sleepCount": 0, // Количество засыпаний при ожидании блокировки 165 | "sleepTimeSum": "0 nanosec", // Общее время ожидания 166 | "maxSleepTime": "0 nanosec" // Максимальное время одного ожидания 167 | } 168 | } 169 | ``` 170 | 171 | После того как запрос выполнился формируется объект `executorNodeStatInPlan`: 172 | ```json 173 | "executorNodeStatInPlan": { 174 | /* Другие поля из explain с типом JSON*/ 175 | "traceData": { 176 | "maxTime": 32256, // Максимальное время выполнения узла плана 177 | "totalCalls": 3, // Количество выполнений узла плана 178 | "totalTimeSum": 32980 // Суммарное время выполнений узла плана 179 | } 180 | } 181 | ``` 182 | 183 | Статистика для вычисляемых выражений выводиться в объекте `exprNodeStat`: 184 | ```json 185 | "exprNodeStat": [ 186 | { 187 | "node": "Const", // Название узла выражения 188 | "totalCalls": 1, // Сколько раз вычислялось 189 | "totalTimeSum": "239 nanosec", // Суммарное время вычисления 190 | "maxTime": "239 nanosec" // Максимальное время вычисления 191 | } 192 | ] 193 | ``` 194 | 195 | Статистика по событиям ожидания за время выполнения запроса 196 | ```json 197 | "waitEventStat": [ 198 | { 199 | "name": "DataFileRead", // Название события ожидания 200 | "count": 5, // Количество произошедших событий 201 | "timeSum": "860555 nanosec", // Общее время ожидания 202 | "maxTime": "275268 nanosec" // Максимальное время одного ожидания 203 | } 204 | ] 205 | ``` -------------------------------------------------------------------------------- /src/uprobe_internal.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include "utils/memutils.h" 3 | 4 | #include "frida-gum.h" 5 | #include "uprobe_message_buffer.h" 6 | 7 | #include "uprobe_internal.h" 8 | 9 | /* frida */ 10 | typedef struct _UprobeListenerNoArgs UprobeListenerNoArgs; 11 | 12 | struct _UprobeListenerNoArgs 13 | { 14 | GObject parent; 15 | }; 16 | 17 | static void uprobe_listener_no_args_iface_init(gpointer g_iface, gpointer iface_data); 18 | #define UPROBE_TYPE_LISTENERNOARGS (uprobe_listener_no_args_get_type()) 19 | G_DECLARE_FINAL_TYPE(UprobeListenerNoArgs, uprobe_listener_no_args, UPROBE, LISTENERNOARGS, 20 | GObject) 21 | G_DEFINE_TYPE_EXTENDED(UprobeListenerNoArgs, uprobe_listener_no_args, G_TYPE_OBJECT, 0, 22 | G_IMPLEMENT_INTERFACE(GUM_TYPE_INVOCATION_LISTENER, 23 | uprobe_listener_no_args_iface_init)) 24 | 25 | 26 | 27 | 28 | typedef struct _UprobeListenerHasArgs UprobeListenerHasArgs; 29 | 30 | struct _UprobeListenerHasArgs 31 | { 32 | GObject parent; 33 | }; 34 | 35 | static void uprobe_listener_has_args_iface_init(gpointer g_iface, gpointer iface_data); 36 | #define UPROBE_TYPE_LISTENERHASARGS (uprobe_listener_has_args_get_type()) 37 | G_DECLARE_FINAL_TYPE(UprobeListenerHasArgs, uprobe_listener_has_args, UPROBE, LISTENERHASARGS, 38 | GObject) 39 | G_DEFINE_TYPE_EXTENDED(UprobeListenerHasArgs, uprobe_listener_has_args, G_TYPE_OBJECT, 0, 40 | G_IMPLEMENT_INTERFACE(GUM_TYPE_INVOCATION_LISTENER, 41 | uprobe_listener_has_args_iface_init)) 42 | 43 | 44 | 45 | /* end frida */ 46 | 47 | /* global */ 48 | 49 | MemoryContext UprobeMemoryContext = NULL; 50 | 51 | static GumInterceptor *interceptor = NULL; 52 | 53 | 54 | struct Uprobe 55 | { 56 | bool isAttached; 57 | GumInvocationListener *listener; 58 | UprobeAttachInterface *uprobeInterface; 59 | }; 60 | 61 | 62 | /* Functions typedefs for callings hooks*/ 63 | typedef void (*Function0Arg) (void *); 64 | typedef void (*Function1Arg) (void *, gpointer); 65 | typedef void (*Function2Arg) (void *, gpointer, gpointer); 66 | typedef void (*Function3Arg) (void *, gpointer, gpointer, gpointer); 67 | typedef void (*Function4Arg) (void *, gpointer, gpointer, gpointer, gpointer); 68 | typedef void (*Function5Arg) (void *, gpointer, gpointer, gpointer, gpointer, gpointer); 69 | typedef void (*Function6Arg) (void *, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer); 70 | typedef void (*Function7Arg) (void *, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer); 71 | typedef void (*Function8Arg) (void *, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer, gpointer); 72 | 73 | 74 | void 75 | UprobeDelete(Uprobe *uprobe) 76 | { 77 | if (uprobe == NULL) 78 | return; 79 | 80 | if (uprobe->isAttached) 81 | gum_interceptor_detach(interceptor, uprobe->listener); 82 | 83 | if (uprobe->listener) 84 | g_object_unref(uprobe->listener); 85 | uprobe->uprobeInterface->cleanFunc(uprobe->uprobeInterface); 86 | pfree(uprobe); 87 | } 88 | 89 | 90 | UPROBE_INIT_RES 91 | UprobeInit(UprobeAttachInterface *uprobeAttach, Uprobe **uprobe) 92 | { 93 | Uprobe *result; 94 | int res; 95 | void *funcAddr; 96 | MemoryContext old; 97 | 98 | 99 | old = MemoryContextSwitchTo(UprobeMemoryContext); 100 | result = (Uprobe *) palloc0(sizeof(Uprobe)); 101 | result->uprobeInterface = uprobeAttach; 102 | 103 | MemoryContextSwitchTo(old); 104 | if (uprobeAttach->numArgs < 0 || uprobeAttach->numArgs > 8) 105 | { 106 | UprobeDelete(result); 107 | return INVALID_NUMBER_OF_ARGS; 108 | } 109 | funcAddr = gum_find_function(uprobeAttach->targetSymbol); 110 | if (funcAddr == NULL) 111 | { 112 | UprobeDelete(result); 113 | return CANNOT_FIND_SYMBOL; 114 | } 115 | gum_interceptor_begin_transaction(interceptor); 116 | if (uprobeAttach->numArgs == 0) 117 | result->listener = (GumInvocationListener *) g_object_new(uprobe_listener_no_args_get_type(), NULL); 118 | else 119 | result->listener = (GumInvocationListener *) g_object_new(uprobe_listener_has_args_get_type(), NULL); 120 | if (result->listener == NULL) 121 | { 122 | gum_interceptor_end_transaction(interceptor); 123 | UprobeDelete(result); 124 | return INTERNAL_ERROR; 125 | } 126 | 127 | res = gum_interceptor_attach(interceptor, funcAddr, result->listener, result, GUM_ATTACH_FLAGS_NONE); 128 | 129 | if (res < 0) 130 | { 131 | gum_interceptor_end_transaction(interceptor); 132 | UprobeDelete(result); 133 | return INTERNAL_ERROR; 134 | } 135 | 136 | gum_interceptor_end_transaction(interceptor); 137 | 138 | result->isAttached = true; 139 | *uprobe = result; 140 | return SUCCESS; 141 | } 142 | 143 | 144 | int 145 | UprobeCompare(Uprobe *uprobe, char *func) 146 | { 147 | return strcmp(uprobe->uprobeInterface->targetSymbol, func); 148 | } 149 | 150 | 151 | const char * 152 | UprobeGetFunc(Uprobe *uprobe) 153 | { 154 | return uprobe->uprobeInterface->targetSymbol; 155 | } 156 | 157 | 158 | void 159 | UprobeCallTimedCallback(Uprobe *uprobe) 160 | { 161 | if (uprobe->uprobeInterface->timedCallback) 162 | uprobe->uprobeInterface->timedCallback(uprobe->uprobeInterface); 163 | } 164 | 165 | 166 | const UprobeAttachInterface * 167 | UprobeGetAttachInterface(Uprobe *uprobe) 168 | { 169 | return uprobe->uprobeInterface; 170 | } 171 | 172 | 173 | void 174 | UprobeInternalInit(void) 175 | { 176 | UprobeMemoryContext = AllocSetContextCreate(TopMemoryContext, "uprobe global context", ALLOCSET_DEFAULT_SIZES); 177 | 178 | /* init frida */ 179 | gum_init(); 180 | interceptor = gum_interceptor_obtain(); 181 | } 182 | 183 | 184 | void 185 | UprobeInternalFini(void) 186 | { 187 | MemoryContextDelete(UprobeMemoryContext); 188 | 189 | /* fini frida */ 190 | g_object_unref(interceptor); 191 | gum_deinit(); 192 | } 193 | 194 | 195 | /* fida code */ 196 | 197 | /* arg getting could be slow, todo: make on pure cpu contexts without functions calls */ 198 | static void 199 | uprobe_listener_on_enter_no_args(GumInvocationListener *listener, GumInvocationContext *ic) 200 | { 201 | struct Uprobe *hookEntry = gum_invocation_context_get_listener_function_data(ic); 202 | Function0Arg function = hookEntry->uprobeInterface->inFunc; 203 | 204 | function(hookEntry->uprobeInterface->data); 205 | } 206 | 207 | 208 | static void 209 | uprobe_listener_on_enter_has_args(GumInvocationListener *listener, GumInvocationContext *ic) 210 | { 211 | struct Uprobe *hookEntry = gum_invocation_context_get_listener_function_data(ic); 212 | 213 | switch (hookEntry->uprobeInterface->numArgs) 214 | { 215 | case 1: 216 | { 217 | Function1Arg function = hookEntry->uprobeInterface->inFunc; 218 | 219 | function(hookEntry->uprobeInterface->data, gum_invocation_context_get_nth_argument(ic, 0)); 220 | break; 221 | } 222 | case 2: 223 | { 224 | Function2Arg function = hookEntry->uprobeInterface->inFunc; 225 | 226 | function(hookEntry->uprobeInterface->data, 227 | gum_invocation_context_get_nth_argument(ic, 0), 228 | gum_invocation_context_get_nth_argument(ic, 1)); 229 | break; 230 | } 231 | case 3: 232 | { 233 | Function3Arg function = hookEntry->uprobeInterface->inFunc; 234 | 235 | function(hookEntry->uprobeInterface->data, 236 | gum_invocation_context_get_nth_argument(ic, 0), 237 | gum_invocation_context_get_nth_argument(ic, 1), 238 | gum_invocation_context_get_nth_argument(ic, 2)); 239 | break; 240 | } 241 | case 4: 242 | { 243 | Function4Arg function = hookEntry->uprobeInterface->inFunc; 244 | 245 | function(hookEntry->uprobeInterface->data, 246 | gum_invocation_context_get_nth_argument(ic, 0), 247 | gum_invocation_context_get_nth_argument(ic, 1), 248 | gum_invocation_context_get_nth_argument(ic, 2), 249 | gum_invocation_context_get_nth_argument(ic, 3)); 250 | break; 251 | } 252 | case 5: 253 | { 254 | Function5Arg function = hookEntry->uprobeInterface->inFunc; 255 | 256 | function(hookEntry->uprobeInterface->data, 257 | gum_invocation_context_get_nth_argument(ic, 0), 258 | gum_invocation_context_get_nth_argument(ic, 1), 259 | gum_invocation_context_get_nth_argument(ic, 2), 260 | gum_invocation_context_get_nth_argument(ic, 3), 261 | gum_invocation_context_get_nth_argument(ic, 4)); 262 | break; 263 | } 264 | case 6: 265 | { 266 | Function6Arg function = hookEntry->uprobeInterface->inFunc; 267 | 268 | function(hookEntry->uprobeInterface->data, 269 | gum_invocation_context_get_nth_argument(ic, 0), 270 | gum_invocation_context_get_nth_argument(ic, 1), 271 | gum_invocation_context_get_nth_argument(ic, 2), 272 | gum_invocation_context_get_nth_argument(ic, 3), 273 | gum_invocation_context_get_nth_argument(ic, 4), 274 | gum_invocation_context_get_nth_argument(ic, 5)); 275 | break; 276 | } 277 | case 7: 278 | { 279 | Function7Arg function = hookEntry->uprobeInterface->inFunc; 280 | 281 | function(hookEntry->uprobeInterface->data, 282 | gum_invocation_context_get_nth_argument(ic, 0), 283 | gum_invocation_context_get_nth_argument(ic, 1), 284 | gum_invocation_context_get_nth_argument(ic, 2), 285 | gum_invocation_context_get_nth_argument(ic, 3), 286 | gum_invocation_context_get_nth_argument(ic, 4), 287 | gum_invocation_context_get_nth_argument(ic, 5), 288 | gum_invocation_context_get_nth_argument(ic, 6)); 289 | break; 290 | } 291 | case 8: 292 | { 293 | Function8Arg function = hookEntry->uprobeInterface->inFunc; 294 | 295 | function(hookEntry->uprobeInterface->data, 296 | gum_invocation_context_get_nth_argument(ic, 0), 297 | gum_invocation_context_get_nth_argument(ic, 1), 298 | gum_invocation_context_get_nth_argument(ic, 2), 299 | gum_invocation_context_get_nth_argument(ic, 3), 300 | gum_invocation_context_get_nth_argument(ic, 4), 301 | gum_invocation_context_get_nth_argument(ic, 5), 302 | gum_invocation_context_get_nth_argument(ic, 6), 303 | gum_invocation_context_get_nth_argument(ic, 7)); 304 | break; 305 | } 306 | default: 307 | break; 308 | } 309 | } 310 | 311 | 312 | static void 313 | uprobe_listener_on_leave(GumInvocationListener *listener, GumInvocationContext *ic) 314 | { 315 | struct Uprobe *hookEntry = gum_invocation_context_get_listener_function_data(ic); 316 | 317 | if (unlikely(hookEntry->uprobeInterface->needRetVal)) 318 | { 319 | Function1Arg function = hookEntry->uprobeInterface->retFunc; 320 | 321 | function(hookEntry->uprobeInterface->data, gum_invocation_context_get_return_value(ic)); 322 | } 323 | else 324 | { 325 | Function0Arg function = hookEntry->uprobeInterface->retFunc; 326 | 327 | function(hookEntry->uprobeInterface->data); 328 | } 329 | } 330 | 331 | 332 | static void 333 | uprobe_listener_no_args_class_init(UprobeListenerNoArgsClass *klass) 334 | { 335 | (void) UPROBE_IS_LISTENERNOARGS; 336 | } 337 | 338 | 339 | static void 340 | uprobe_listener_no_args_iface_init(gpointer g_iface, gpointer iface_data) 341 | { 342 | GumInvocationListenerInterface *iface = (GumInvocationListenerInterface *) g_iface; 343 | 344 | iface->on_enter = uprobe_listener_on_enter_no_args; 345 | iface->on_leave = uprobe_listener_on_leave; 346 | } 347 | 348 | 349 | static void 350 | uprobe_listener_no_args_init(UprobeListenerNoArgs *self) 351 | { 352 | } 353 | 354 | 355 | static void 356 | uprobe_listener_has_args_class_init(UprobeListenerHasArgsClass *klass) 357 | { 358 | (void) UPROBE_IS_LISTENERHASARGS; 359 | } 360 | 361 | 362 | static void 363 | uprobe_listener_has_args_iface_init(gpointer g_iface, gpointer iface_data) 364 | { 365 | GumInvocationListenerInterface *iface = (GumInvocationListenerInterface *) g_iface; 366 | 367 | iface->on_enter = uprobe_listener_on_enter_has_args; 368 | iface->on_leave = uprobe_listener_on_leave; 369 | } 370 | 371 | 372 | static void 373 | uprobe_listener_has_args_init(UprobeListenerHasArgs *self) 374 | { 375 | } 376 | -------------------------------------------------------------------------------- /src/trace_lock_on_buffers.c: -------------------------------------------------------------------------------- 1 | #include "postgres.h" 2 | #include 3 | #include "miscadmin.h" 4 | #include "utils/hsearch.h" 5 | #include "utils/relcache.h" 6 | #include "utils/rel.h" 7 | #include "utils/lsyscache.h" 8 | #if PG_MAJORVERSION_NUM > 15 9 | #include "utils/relfilenumbermap.h" 10 | #else 11 | #include "utils/relfilenodemap.h" 12 | #endif 13 | #include "commands/dbcommands.h" 14 | #include "commands/tablespace.h" 15 | #include "storage/buf_internals.h" 16 | #include "storage/lwlock.h" 17 | 18 | #include "trace_wait_events.h" 19 | #include "trace_session.h" 20 | #include "trace_file.h" 21 | #include "list.h" 22 | 23 | #include "trace_lock_on_buffers.h" 24 | 25 | typedef struct BufferLWLockStatData 26 | { 27 | BufferTag bufferTag; 28 | int lastCallMode; 29 | 30 | uint64 totalCallsExclusive; 31 | uint64 sleepCountExclusive; 32 | uint64 sleepTimeSumExclusive; 33 | uint64 maxSleepTimeExclusive; 34 | 35 | uint64 totalCallsShared; 36 | uint64 sleepCountShared; 37 | uint64 sleepTimeSumShared; 38 | uint64 maxSleepTimeShared; 39 | 40 | uint64 sleepStart; 41 | } BufferLWLockStatData; 42 | 43 | 44 | 45 | static BufferLWLockStatData *currentTraceLock = NULL; 46 | 47 | 48 | static UprobeList *bufferLockStatStorageList = NULL; 49 | 50 | static MemoryContext traceMemoryContext; 51 | 52 | static bool isLogOnlySleep = false; 53 | 54 | static int BufferLockStatStorageCmp(const void *htab1, const void *htab2); 55 | static HTAB *BufferLWLockStatStorageInit(void); 56 | static void BufferLWLockStatStorageDelete(void); 57 | static void BufferLWLockTraceInFunc(void *data, Buffer buffer, int mode); 58 | static void BufferLWLockTraceRetFunc(void); 59 | static void BufferLWLockTraceClean(UprobeAttachInterface *uprobe); 60 | 61 | static void LWLockTraceSleepInFunc(void); 62 | static void LWLockTraceSleepRetFunc(void); 63 | static void LWLockTraceSleepClean(UprobeAttachInterface *uprobe); 64 | 65 | static void LockOnBuffersTraceStartWrite(StringInfo stream); 66 | static void LockOnBuffersTraceEndWrite(StringInfo stream, bool resultIsEmpty); 67 | static bool LockOnBuffersTraceWriteOneLock(StringInfo stream, BufferLWLockStatData *lwlockData); 68 | static bool LockOnBuffersTraceWriteStatInternal(HTAB *statStorage, StringInfo stream, bool shouldClean); 69 | 70 | 71 | static int 72 | BufferLockStatStorageCmp(const void *htab1, const void *htab2) 73 | { 74 | return htab1 == htab2 ? 0 : 1; 75 | } 76 | 77 | 78 | static HTAB * 79 | BufferLWLockStatStorageInit(void) 80 | { 81 | HASHCTL map_info; 82 | 83 | map_info.keysize = sizeof(BufferTag); 84 | map_info.entrysize = sizeof(BufferLWLockStatData); 85 | map_info.match = memcmp; 86 | return hash_create("map for BufferLWLock trace", 128, &map_info, HASH_ELEM | HASH_COMPARE | HASH_BLOBS); 87 | } 88 | 89 | 90 | static void 91 | BufferLWLockStatStorageDelete(void) 92 | { 93 | LIST_FOREACH(bufferLockStatStorageList, it) 94 | { 95 | hash_destroy(it->value); 96 | } 97 | ListFree(bufferLockStatStorageList); 98 | } 99 | 100 | 101 | static void 102 | BufferLWLockTraceInFunc(void *data, Buffer buffer, int mode) 103 | { 104 | bool isFound; 105 | BufferDesc *desc; 106 | HTAB *currentStorage = (HTAB *) LIST_LAST(bufferLockStatStorageList); 107 | 108 | if (BufferIsLocal(buffer) || mode == BUFFER_LOCK_UNLOCK) 109 | return; 110 | Assert(currentStorage != NULL); 111 | desc = GetBufferDescriptor(buffer - 1); 112 | if (CritSectionCount == 0) 113 | { 114 | currentTraceLock = hash_search(currentStorage, &desc->tag, HASH_ENTER, &isFound); 115 | } 116 | else 117 | { 118 | currentTraceLock = hash_search(currentStorage, &desc->tag, HASH_FIND, &isFound); 119 | if (currentTraceLock == NULL) 120 | return; 121 | } 122 | currentTraceLock->lastCallMode = mode; 123 | if (!isFound) 124 | { 125 | currentTraceLock->sleepCountExclusive = 0; 126 | currentTraceLock->sleepTimeSumExclusive = 0; 127 | currentTraceLock->totalCallsExclusive = 0; 128 | currentTraceLock->maxSleepTimeExclusive = 0; 129 | 130 | currentTraceLock->sleepCountShared = 0; 131 | currentTraceLock->sleepTimeSumShared = 0; 132 | currentTraceLock->totalCallsShared = 0; 133 | currentTraceLock->maxSleepTimeShared = 0; 134 | } 135 | if (mode == BUFFER_LOCK_SHARE) 136 | { 137 | currentTraceLock->totalCallsShared++; 138 | } 139 | else if (mode == BUFFER_LOCK_EXCLUSIVE) 140 | { 141 | currentTraceLock->totalCallsExclusive++; 142 | } 143 | } 144 | 145 | 146 | static void 147 | BufferLWLockTraceRetFunc(void) 148 | { 149 | currentTraceLock = NULL; 150 | } 151 | 152 | 153 | static void 154 | BufferLWLockTraceClean(UprobeAttachInterface *uprobe) 155 | { 156 | pfree(uprobe); 157 | BufferLWLockStatStorageDelete(); 158 | } 159 | 160 | 161 | static void 162 | LWLockTraceSleepInFunc(void) 163 | { 164 | struct timespec time; 165 | uint64 timeNano; 166 | 167 | clock_gettime(CLOCK_MONOTONIC, &time); 168 | timeNano = time.tv_sec * 1000000000L + time.tv_nsec; 169 | SignalWaitEventStart(timeNano); 170 | 171 | if (!currentTraceLock) 172 | return; 173 | 174 | if (currentTraceLock->lastCallMode == LW_SHARED) 175 | currentTraceLock->sleepCountShared++; 176 | else 177 | currentTraceLock->sleepCountExclusive++; 178 | 179 | currentTraceLock->sleepStart = timeNano; 180 | } 181 | 182 | 183 | static void 184 | LWLockTraceSleepRetFunc(void) 185 | { 186 | struct timespec time; 187 | uint64 timeNano; 188 | uint64 sleepTime; 189 | 190 | clock_gettime(CLOCK_MONOTONIC, &time); 191 | timeNano = time.tv_sec * 1000000000L + time.tv_nsec; 192 | SignalWaitEventEnd(timeNano); 193 | 194 | if (!currentTraceLock) 195 | return; 196 | 197 | sleepTime = timeNano - currentTraceLock->sleepStart; 198 | if (currentTraceLock->lastCallMode == LW_SHARED) 199 | { 200 | currentTraceLock->sleepTimeSumShared += sleepTime; 201 | if (sleepTime > currentTraceLock->maxSleepTimeShared) 202 | currentTraceLock->maxSleepTimeShared = sleepTime; 203 | } 204 | else 205 | { 206 | currentTraceLock->sleepTimeSumExclusive += sleepTime; 207 | if (sleepTime > currentTraceLock->maxSleepTimeExclusive) 208 | currentTraceLock->maxSleepTimeExclusive = sleepTime; 209 | } 210 | } 211 | 212 | 213 | static void 214 | LWLockTraceSleepClean(UprobeAttachInterface *uprobe) 215 | { 216 | pfree(uprobe); 217 | } 218 | 219 | 220 | /* return 2 Uprobes to attach in resUrpobesToAttach array */ 221 | void 222 | LockOnBuffersUprobesGet(MemoryContext context, UprobeAttachInterface **resUrpobesToAttach, bool shouldLogOnlySleep) 223 | { 224 | UprobeAttachInterface *uprobe = palloc0(sizeof(UprobeAttachInterface)); 225 | 226 | uprobe->cleanFunc = BufferLWLockTraceClean; 227 | uprobe->inFunc = BufferLWLockTraceInFunc; 228 | uprobe->retFunc = BufferLWLockTraceRetFunc; 229 | uprobe->numArgs = 2; 230 | uprobe->targetSymbol = "LockBuffer"; 231 | 232 | resUrpobesToAttach[0] = uprobe; 233 | 234 | uprobe = palloc0(sizeof(UprobeAttachInterface)); 235 | uprobe->cleanFunc = LWLockTraceSleepClean; 236 | uprobe->inFunc = LWLockTraceSleepInFunc; 237 | uprobe->retFunc = LWLockTraceSleepRetFunc; 238 | uprobe->targetSymbol = "PGSemaphoreLock"; 239 | 240 | resUrpobesToAttach[1] = uprobe; 241 | ListInit(&bufferLockStatStorageList, BufferLockStatStorageCmp, context); 242 | ListAdd(bufferLockStatStorageList, BufferLWLockStatStorageInit()); 243 | traceMemoryContext = context; 244 | isLogOnlySleep = shouldLogOnlySleep; 245 | } 246 | 247 | 248 | static void 249 | LockOnBuffersTraceStartWrite(StringInfo stream) 250 | { 251 | if (writeMode == JSON_WRITE_MODE) 252 | appendStringInfo(stream, "[\n"); 253 | } 254 | 255 | 256 | static void 257 | LockOnBuffersTraceEndWrite(StringInfo stream, bool resultIsEmpty) 258 | { 259 | if (writeMode == JSON_WRITE_MODE && !resultIsEmpty) 260 | { 261 | /* delete last ',' in array */ 262 | stream->data[stream->len - 2] = ' '; 263 | appendStringInfo(stream, "]\n"); 264 | } 265 | } 266 | 267 | 268 | /* returns true if lwlock stat was written */ 269 | static bool 270 | LockOnBuffersTraceWriteOneLock(StringInfo stream, BufferLWLockStatData *lwlockData) 271 | { 272 | Oid relIdForTag; 273 | Relation relForTag; 274 | char *spcName; 275 | char *dbName; 276 | char *namespaceName; 277 | 278 | #if PG_MAJORVERSION_NUM > 15 279 | relIdForTag = RelidByRelfilenumber(lwlockData->bufferTag.spcOid, lwlockData->bufferTag.relNumber); 280 | #else 281 | relIdForTag = RelidByRelfilenode(lwlockData->bufferTag.rnode.spcNode, lwlockData->bufferTag.rnode.relNode); 282 | #endif 283 | if (InvalidOid == relIdForTag) 284 | return false; 285 | relForTag = RelationIdGetRelation(relIdForTag); 286 | if (relForTag == NULL) 287 | return false; 288 | namespaceName = get_namespace_name_or_temp(relForTag->rd_rel->relnamespace); 289 | #if PG_MAJORVERSION_NUM > 15 290 | dbName = get_database_name(lwlockData->bufferTag.dbOid); 291 | spcName = get_tablespace_name(lwlockData->bufferTag.spcOid); 292 | #else 293 | dbName = get_database_name(lwlockData->bufferTag.rnode.dbNode); 294 | spcName = get_tablespace_name(lwlockData->bufferTag.rnode.spcNode); 295 | #endif 296 | if (writeMode == JSON_WRITE_MODE) 297 | { 298 | appendStringInfo(stream, 299 | " {\n" 300 | " \"bufferTag\": {\n" 301 | " \"spcOid\": %u,\n" 302 | " \"spcName\": \"%s\", \n" 303 | " \"dbOid\": %u,\n" 304 | " \"dbName\": \"%s\",\n" 305 | " \"relNumber\": %u,\n" 306 | " \"relName\": \"%s.%s\",\n" 307 | " \"relKind\": \"%c\",\n" 308 | " \"forkName\": \"%s\",\n" 309 | " \"blockNumber\": %u\n" 310 | " },\n" 311 | " \"exclusive\": {\n" 312 | " \"totalCalls\": %lu,\n" 313 | " \"sleepCount\": %lu,\n" 314 | " \"sleepTimeSum\": \"%lu nanosec\",\n" 315 | " \"maxSleepTime\": \"%lu nanosec\"\n" 316 | " },\n" 317 | " \"shared\": {\n" 318 | " \"totalCalls\": %lu,\n" 319 | " \"sleepCount\": %lu,\n" 320 | " \"sleepTimeSum\": \"%lu nanosec\",\n" 321 | " \"maxSleepTime\": \"%lu nanosec\"\n" 322 | " }\n" 323 | " },\n", 324 | #if PG_MAJORVERSION_NUM > 15 325 | lwlockData->bufferTag.spcOid, 326 | spcName, 327 | lwlockData->bufferTag.dbOid, 328 | dbName, 329 | lwlockData->bufferTag.relNumber, 330 | #else 331 | lwlockData->bufferTag.rnode.spcNode, 332 | spcName, 333 | lwlockData->bufferTag.rnode.dbNode, 334 | dbName, 335 | lwlockData->bufferTag.rnode.spcNode, 336 | #endif 337 | namespaceName, 338 | RelationGetRelationName(relForTag), 339 | relForTag->rd_rel->relkind, 340 | forkNames[lwlockData->bufferTag.forkNum], 341 | lwlockData->bufferTag.blockNum, 342 | lwlockData->totalCallsExclusive, 343 | lwlockData->sleepCountExclusive, 344 | lwlockData->sleepTimeSumExclusive, 345 | lwlockData->maxSleepTimeExclusive, 346 | lwlockData->totalCallsShared, 347 | lwlockData->sleepCountShared, 348 | lwlockData->sleepTimeSumShared, 349 | lwlockData->maxSleepTimeShared 350 | ); 351 | } 352 | else 353 | { 354 | appendStringInfo(stream, 355 | "BufferTag: " 356 | "spcOid=%u spcName=%s " 357 | "dbOid=%u dbName=%s " 358 | "relNumber=%u relName=%s.%s " 359 | "relKind=%c forkName=%s " 360 | "blockNumber=%u\n" 361 | "Exclusive: " 362 | "totalCalls=%lu sleepCount=%lu " 363 | "sleepTimeSum=%lu nanosec maxSleepTime=%lu nanosec\n" 364 | "Shared: " 365 | "totalCalls=%lu sleepCount=%lu " 366 | "sleepTimeSum=%lu nanosec maxSleepTime=%lu nanosec\n\n", 367 | #if PG_MAJORVERSION_NUM > 15 368 | lwlockData->bufferTag.spcOid, 369 | spcName, 370 | lwlockData->bufferTag.dbOid, 371 | dbName, 372 | lwlockData->bufferTag.relNumber, 373 | #else 374 | lwlockData->bufferTag.rnode.spcNode, 375 | spcName, 376 | lwlockData->bufferTag.rnode.dbNode, 377 | dbName, 378 | lwlockData->bufferTag.rnode.spcNode, 379 | #endif 380 | namespaceName, 381 | RelationGetRelationName(relForTag), 382 | relForTag->rd_rel->relkind, 383 | forkNames[lwlockData->bufferTag.forkNum], 384 | lwlockData->bufferTag.blockNum, 385 | lwlockData->totalCallsExclusive, 386 | lwlockData->sleepCountExclusive, 387 | lwlockData->sleepTimeSumExclusive, 388 | lwlockData->maxSleepTimeExclusive, 389 | lwlockData->totalCallsShared, 390 | lwlockData->sleepCountShared, 391 | lwlockData->sleepTimeSumShared, 392 | lwlockData->maxSleepTimeShared 393 | ); 394 | } 395 | 396 | RelationClose(relForTag); 397 | if (namespaceName) 398 | pfree(namespaceName); 399 | if (dbName) 400 | pfree(dbName); 401 | if (spcName) 402 | pfree(spcName); 403 | return true; 404 | } 405 | 406 | 407 | static bool 408 | LockOnBuffersTraceWriteStatInternal(HTAB *statStorage, StringInfo stream, bool shouldClean) 409 | { 410 | HASH_SEQ_STATUS mapIterator; 411 | BufferLWLockStatData *mapEntry; 412 | bool resultIsEmpty = true; 413 | 414 | if (statStorage == NULL) 415 | return !resultIsEmpty; 416 | 417 | 418 | LockOnBuffersTraceStartWrite(stream); 419 | hash_seq_init(&mapIterator, statStorage); 420 | mapEntry = (BufferLWLockStatData *) hash_seq_search(&mapIterator); 421 | while (mapEntry) 422 | { 423 | if (isLogOnlySleep && !mapEntry->sleepCountExclusive && !mapEntry->sleepCountShared) 424 | { 425 | mapEntry = (BufferLWLockStatData *) hash_seq_search(&mapIterator); 426 | continue; 427 | } 428 | 429 | resultIsEmpty = !LockOnBuffersTraceWriteOneLock(stream, mapEntry); 430 | 431 | mapEntry = (BufferLWLockStatData *) hash_seq_search(&mapIterator); 432 | } 433 | 434 | LockOnBuffersTraceEndWrite(stream, resultIsEmpty); 435 | 436 | if (shouldClean) 437 | { 438 | hash_destroy(ListPopLast(bufferLockStatStorageList)); 439 | ListAdd(bufferLockStatStorageList, BufferLWLockStatStorageInit()); 440 | } 441 | return !resultIsEmpty; 442 | } 443 | 444 | 445 | bool 446 | LockOnBuffersTraceWriteStat(StringInfo stream, bool shouldClean) 447 | { 448 | return LockOnBuffersTraceWriteStatInternal( 449 | (HTAB *) LIST_LAST(bufferLockStatStorageList), stream, shouldClean); 450 | } 451 | 452 | 453 | void 454 | LockOnBuffersTraceStatPush(void) 455 | { 456 | ListAdd(bufferLockStatStorageList, BufferLWLockStatStorageInit()); 457 | } 458 | 459 | void 460 | LockOnBuffersTraceStatPop(void) 461 | { 462 | hash_destroy(ListPopLast(bufferLockStatStorageList)); 463 | } 464 | 465 | 466 | HTAB * 467 | LockOnBuffersTraceStatPopAndGet(void) 468 | { 469 | return (HTAB *) ListPopLast(bufferLockStatStorageList); 470 | } 471 | 472 | 473 | void 474 | LockOnBuffersTraceWriteStatWithName(HTAB *data, const char *shortName) 475 | { 476 | MemoryContext old; 477 | StringInfoData str; 478 | bool hasLWLockStat; 479 | 480 | old = MemoryContextSwitchTo(traceMemoryContext); 481 | initStringInfo(&str); 482 | hasLWLockStat = LockOnBuffersTraceWriteStatInternal(data, &str, false); 483 | if (hasLWLockStat) 484 | { 485 | if (writeMode == TEXT_WRITE_MODE) 486 | TracePrintf("TRACE LWLOCK. %s: %s", shortName, str.data); 487 | else 488 | TracePrintf( 489 | "\"%s\": %s,\n", 490 | shortName, 491 | str.data 492 | ); 493 | } 494 | pfree(str.data); 495 | MemoryContextSwitchTo(old); 496 | } 497 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | - [pg_uprobe](#pg_uprobe) 3 | - [Concept](#concept) 4 | - [Session Tracing](#session-tracing) 5 | - [Profiling PostgreSQL Functions](#profiling-postgresql-functions) 6 | - [Limitations](#limitations) 7 | - [Session Tracing](#session-tracing-1) 8 | - [Profiling PostgreSQL Functions](#profiling-postgresql-functions-1) 9 | - [Installation](#installation) 10 | - [Build](#build) 11 | - [Configuration](#configuration) 12 | - [Creating the Extension](#creating-the-extension) 13 | - [Tests] (#tests) 14 | - [Settings](#settings) 15 | - [Usage](#usage) 16 | - [Usage Examples](#usage-examples) 17 | - [Code branching model](#code-branching-model) 18 | 19 | # pg_uprobe 20 | A PostgreSQL extension designed for tracing and analyzing queries executed within a session. This extension allows to capture and log information about running queries inside session. The collected information can retrieve heavy operations and most consumed resources during the execution of specific SQL queries. 21 | 22 | For advanced users familiar with PostgreSQL's internal architecture, there is an option to set dynamic probes on C functions to examine the internals of the DBMS. 23 | 24 | ## Concept 25 | 26 | Under the hood, pg_uprobe utilizes one of the best code analysis tools - [Frida](https://frida.re) and [Frida Gum](https://github.com/frida/frida-gum) library, which is part of the Frida ecosystem and provides a low-level API for dynamic code injection. 27 | 28 | Using the extension does not require patching or modifying PostgreSQL's source code. The Frida toolkit allows dynamically injecting the necessary code into a running PostgreSQL instance. 29 | 30 | ### Session Tracing 31 | 32 | Once a connection to the DBMS is established, the extension can be used to enable tracing of all queries executed within session. Tracing can be enabled for your session using the start_session_trace() function, or for other sessions, such as those created by an application. 33 | 34 | The collected information can reveal the following: 35 | 36 | - Query text 37 | - Time spent parsing the query 38 | - Query plan 39 | - Query plan type (generic, custom) 40 | - Time spent planning the query 41 | - Time spent executing the entire query 42 | - Time spent executing each plan node 43 | - Time spent waiting for events (wait events). For example: file reads, lock waits 44 | - Shared memory locks acquired during query execution 45 | 46 | This information is collected for every query executed during tracing. 47 | 48 | ### Profiling PostgreSQL Functions 49 | 50 | If you are familiar with PostgreSQL's source code, our extension allows setting dynamic probes on some (see the "Limitations" section) C functions inside PostgreSQL core. Functions can be profiled either for your own session or for all sessions created after the probe is set. We have prepared several types of probes that can be installed. 51 | 52 | Probe types: 53 | - TIME - Measures the time spent executing the function 54 | - HIST - Measures the time spent executing the function and builds a histogram of execution times 55 | - MEM - Measures the change in PostgreSQL memory (MemoryContext) before and after the function execution 56 | 57 | ## Limitations 58 | 59 | Supported PostgreSQL versions: 60 | 61 | - PostgreSQL 15/16/17 62 | - [Postgres Pro Standard 15/16/17](https://postgrespro.ru/products/postgrespro/standard) 63 | - [Postgres Pro Enterprise 15/16/17](https://postgrespro.ru/products/postgrespro/enterprise) 64 | 65 | Supported architectures: 66 | 67 | |Informal name|Name in RPM and Linux kernel|Name in Debian and Astra|Features of hardware platform support| 68 | |------------|------------|------------|------------| 69 | |Intel compatible 64-bit|x86_64|amd64|| 70 | |ARM 64-bit|aarch64|arm64|| Not tested, but Frida library supports this architecture 71 | 72 | (We are working on supporting more architectures) 73 | 74 | Supported operating systems: 75 | 76 | - Linux. Correct operation is guaranteed on the same versions of Linux distributions that are supported by current versions of PostgresPro. 77 | - FreeBSD 78 | 79 | ### Session Tracing 80 | 81 | When tracing sessions, query execution time may increase. In our measurements, performance drops by ~5%. Therefore, session tracing should not be left enabled for extended periods. This tool is primarily intended for investigating issues, not detecting them. 82 | 83 | ### Profiling PostgreSQL Functions 84 | 85 | Unfortunately, we cannot profile all PostgreSQL functions. A function must meet certain criteria: 86 | - Not Inline 87 | - Not Static 88 | - i.e the function must be present in the ELF file 89 | 90 | To check whether your desired function is in the ELF file, you can use the following commands (for instance Postgres Pro Enterprise 15): 91 | 92 | **objdump**: 93 | ```shell 94 | objdump -T /opt/pgpro/ent-15/bin/postgres 95 | objdump -T /opt/pgpro/ent-15/bin/postgres | awk '{ print $7 }' 96 | ``` 97 | 98 | **readelf**: 99 | ```shell 100 | readelf -s -W /opt/pgpro/ent-15/bin/postgres 101 | readelf -s -W /opt/pgpro/ent-15/bin/postgres | awk '{ print $8 }' 102 | ``` 103 | 104 | **nm**: 105 | ```shell 106 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres 107 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres | awk '{ print $NF }' 108 | ``` 109 | 110 | Where `/opt/pgpro/ent-15/bin/postgres` is the path to the installed Postgres Pro/PostgreSQL binaries. 111 | 112 | ## Installation 113 | 114 | Currently, the extension can only be built manually. There is no prebuilt packages. 115 | 116 | Requirements: 117 | - gcc 118 | - CMake >= 3.15 119 | - python 120 | 121 | The installation process involves the following steps: 122 | 123 | - Download Frida library 124 | - Build the extension itself 125 | 126 | ### Build 127 | 128 | ```shell 129 | git clone https://github.com/postgrespro/pg_uprobe 130 | cd pg_uprobe 131 | make USE_PGXS=1 PG_CONFIG=/opt/pgpro/ent-15/bin/pg_config install 132 | ``` 133 | 134 | Note: `/opt/pgpro/ent-15/bin/pg_config` is the path to the installed pg_config application, which is stored in the same location as PostgreSQL. 135 | 136 | ### Configuration 137 | 138 | In the `$PGDATA/postgresql.conf` file, add the pg_uprobe extension to the list of shared libraries loaded at server startup: 139 | 140 | ```shell 141 | shared_preload_libraries = 'pg_uprobe' # (change requires restart) 142 | ``` 143 | After this, restart the PostgreSQL cluster. 144 | 145 | ### Creating the Extension 146 | 147 | ```sql 148 | postgres=# CREATE EXTENSION pg_uprobe; 149 | ``` 150 | If you need to install it in a different schema, simply create the schema and install the extension there: 151 | 152 | ```sql 153 | postgres=# CREATE SCHEMA uprobe; 154 | postgres=# CREATE EXTENSION pg_uprobe SCHEMA uprobe; 155 | ``` 156 | We recommend installation in a dedicated schema. All objects will be created in the schema specified by the `SCHEMA` clause. If you do not want to specify the schema qualifier when using the extension, consider modifying the `search_path` parameter. 157 | 158 | ## Tests 159 | The tests are written in Python using the testgres framework. To run the tests, you need to install the testgres package for Python and set the PG_CONFIG environment variable to the path to the pg_config executable of your PostgreSQL installation. 160 | Running tests: 161 | 162 | ```shell 163 | make PG_CONFIG=/opt/pgpro/ent-15/bin/pg_config python_tests 164 | ``` 165 | 166 | ## Settings 167 | 168 | - **pg_uprobe.data_dir** - Path to the directory where the session trace results file and function profiling results files are created. Default: `$PGDATA/pg_uprobe` 169 | - **pg_uprobe.trace_file_name** - Name of the file for session trace results. Default: `trace_file.txt` 170 | - **pg_uprobe.trace_file_limit** - Limit in megabytes for the session trace results file. Default: 16 MB 171 | - **pg_uprobe.trace_write_mode** - Output format for session tracing. Supported values: "text", "json". Default: json 172 | - **pg_uprobe.trace_lwlocks_for_each_node** - If `true`, LWLock statistics will be reset after each `Executor Node` execution; otherwise, statistics will be reset after the `PortalRun` function completes. Default: `true` 173 | - **pg_uprobe.write_only_sleep_lwlocks_stat** - If true, LWLock statistics will only be written in case of lock waits; otherwise, statistics for all acquired LWLocks will be written. Default: true 174 | 175 | 176 | ## Usage 177 | 178 | Documentation for [session tracing](doc/trace_session.md) 179 | 180 | Documentation for [profiling PostgreSQL functions](doc/profile_func.md) 181 | 182 | ## Usage Examples 183 | 184 | Simple example of [session tracing](doc/example_trace_session.md) 185 | 186 | Simple example of [profiling PostgreSQL functions](doc/example_profile_func.md) 187 | 188 | ## Code branching model 189 | 190 | **GitFlow** is used as the main code branching model in the git repository. 191 | 192 | ==============================<<>>============================== 193 | 194 | # pg_uprobe 195 | 196 | Расширение PostgreSQL, предназначено для трассирования и анализа запросов, выполняемых в рамках сеанса. С помощью этого расширения можно захватывать и регистрировать информацию о запросах, которые были выполнены в процессе мониторинга. Собранную информацию можно детально исследовать, чтобы понять, на какие операции и ресурсы уходит время при выполнении конкретных SQL-запросов. 197 | 198 | Для более продвинутых пользователей, которые знакомы с внутренним устройством PostgreSQL, есть возможность устанавливать динамические пробы в user space и также детально исследовать внутреннюю работу СУБД. 199 | 200 | ## Концепция 201 | 202 | Под капотом pg_uprobe использует один из лучших инструментов для анализа кода - [Frida](https://frida.re), а также библиотеку [Frida Gum](https://github.com/frida/frida-gum), которая является частью экосистемы Frida и предоставляет низкоуровневый API для работы с динамическим внедрением кода. 203 | 204 | Для использования расширения не требуется применять патчи или каким-либо образом изменять исходный код PostgreSQL. Инструментарий Frida позволяет динамически внедрять необходимый код в уже работающий PostgreSQL. 205 | 206 | ### Трассирование сеансов 207 | 208 | Когда подключение к СУБД уже создано, с помощью расширения можно включить трассирование всех запросов, которые будут выполняться в рамках этого сеанса. Включить трассирование можно как для своего сеанса, используя функцию start_session_trace(), так и для трассирования других сеансов, например сеансов, созданных приложением. 209 | 210 | По собранной информации можно определить следующее: 211 | 212 | - Текст запроса 213 | - Время затраченное на разбор запроса 214 | - План запроса 215 | - Тип плана запроса(generic, custom) 216 | - Время, затраченное на планирование запроса 217 | - Время, затраченное на выполнение всего запроса 218 | - Время, затраченное на выполнение каждого узла плана 219 | - Время, затраченное на ожидание событий(wait events). Например: чтение файлов, ожидание блокировки 220 | - Блокировки в разделяемой памяти, захваченные во время выполнения запроса 221 | 222 | Эта информация будет собрана для каждого запроса, выполнявшегося в момент трассирования. 223 | 224 | ### Профилирование функций PostgreSQL 225 | 226 | Если вы знакомы с исходным кодом PostgreSQL, наше расширение позволяет устанавливать динамические пробы в user space на некоторые(см. раздел "Ограничения") функции PostgreSQL. Профилировать функции можно как для своего сеанса, так и для всех сеансов, которые будут созданы после установки пробы. Мы подготовили несколько типов проб, которые можно установить. 227 | 228 | Типы проб: 229 | - TIME - Измеряет, сколько времени мы затратили на выполнение функции 230 | - HIST - Измеряет, сколько времени мы потратили на выполнение функции, и после строится гистограмма по времени выполнения 231 | - MEM - Измеряет, на сколько изменилась память PostgreSQL(MemoryContext) до выполнения функции и после 232 | 233 | ## Ограничения 234 | 235 | Поддерживаемые версии PostgreSQL: 236 | - PostgreSQL 15/16/17 237 | - [Postgres Pro Standard 15/16/17](https://postgrespro.ru/products/postgrespro/standard) 238 | - [Postgres Pro Enterprise 15/16/17](https://postgrespro.ru/products/postgrespro/enterprise) 239 | 240 | Поддерживаемые архитектуры: 241 | 242 | |Неформальное название|Название в RPM и Linux kernel|Название в Debian и Astra|Особенности поддержки аппаратных платформ| 243 | |------------|------------|------------|------------| 244 | |Интел-совместимые 64-бит|x86_64|amd64|| 245 | |ARM 64-битные|aarch64|arm64|| Не тестировалось, но библиотека Frida поддерживает данную архитектуру 246 | 247 | (Мы работаем над поддержкой большего количества архитектур) 248 | 249 | Поддерживаемые операционные системы: 250 | - Linux. Корректная работа гарантируется на тех же версиях дистрибутивов Linix, которые поддерживаются актуальными версиями PostgresPro. 251 | - FreeBSD 252 | 253 | ### Трассирование сеансов 254 | При трассировании сеансов время выполнения запросов может увеличиться. В наших измерениях скорость выполнения падает на ~5%. Поэтому не стоит оставлять трассирование сеансов на длительное время. Этот инструмент предназначен, в первую очередь, для исследования проблемы, а не её обнаружения. 255 | 256 | ### Профилирование функций PostgreSQL 257 | К сожалению, мы не можем профилировать все функции PostgreSQL. Функция должна обладать рядом свойств: 258 | 259 | - Не Inline 260 | - Не Static 261 | - Другими словами, функция должна находиться в ELF файле 262 | 263 | Чтобы проверить, находится ли необходимая вам функция в ELF файле, можно воспользоваться следующими командами: 264 | 265 | **objdump**: 266 | ```shell 267 | objdump -T /opt/pgpro/ent-15/bin/postgres 268 | objdump -T /opt/pgpro/ent-15/bin/postgres | awk '{ print $7 }' 269 | ``` 270 | 271 | **readelf**: 272 | ```shell 273 | readelf -s -W /opt/pgpro/ent-15/bin/postgres 274 | readelf -s -W /opt/pgpro/ent-15/bin/postgres | awk '{ print $8 }' 275 | ``` 276 | 277 | **nm**: 278 | ```shell 279 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres 280 | nm -D --demangle /opt/pgpro/ent-15/bin/postgres | awk '{ print $NF }' 281 | ``` 282 | Где /opt/pgpro/ent-15/bin/postgres - путь до установленных бинарников PostgreSQL. 283 | 284 | 285 | ## Установка 286 | На данный момент есть возможность установить расширения только собрав его непосредственно на машине где установлена СУБД. 287 | 288 | Требования: 289 | - gcc 290 | - CMake >= 3.15 291 | - python 292 | 293 | В процессе установки будут выполнены следующие шаги: 294 | - Скачиваются библиотека Frida 295 | - Собирается само расширение 296 | 297 | ### Сборка 298 | 299 | ```shell 300 | git clone https://github.com/postgrespro/pg_uprobe 301 | cd pg_uprobe 302 | make USE_PGXS=1 PG_CONFIG=/opt/pgpro/ent-15/bin/pg_config install 303 | ``` 304 | 305 | ### Настройка 306 | 307 | Где `/opt/pgpro/ent-15/bin/pg_config` — путь до установленного приложения pg_config, который хранится там же, где установлен PostgreSQL. 308 | 309 | В файле `$PGDATA/postgresql.conf` необходимо добавить расширение pg_uprobe в список разделяемых библиотек, которые будут загружаться при запуске сервера: 310 | 311 | ```shell 312 | shared_preload_libraries = 'pg_uprobe' # (change requires restart) 313 | ``` 314 | 315 | После этого необходимо перезапустить кластер PostgreSQL. 316 | 317 | ### Создание расширения 318 | ```sql 319 | postgres=# CREATE EXTENSION pg_uprobe; 320 | ``` 321 | Если необходимо установить в другую схему, просто создайте её, и установите расширение в эту схему: 322 | 323 | ```sql 324 | postgres=# CREATE SCHEMA uprobe; 325 | postgres=# CREATE EXTENSION pg_uprobe SCHEMA uprobe; 326 | ``` 327 | Все объекты будут созданы в схеме, определенной предложением SCHEMA. Рекомендуется установка в выделенную схему где расширение создаст свои собственные функции. Если вы не хотите указывать квалификатор схемы при использовании расширения, рассмотрите возможность изменения параметра search_path. 328 | 329 | ## Способы тестирования 330 | Тесты написаны на языке Python с использованием фреймворка testgres. Для запуска тестов необходимо установить пакет testgres для Python и установить переменную окружения PG_CONFIG в путь до исполняемого файла pg_config вашего установленного PostgreSQL. 331 | Запуск тестов: 332 | 333 | ```shell 334 | make PG_CONFIG=/opt/pgpro/ent-15/bin/pg_config python_tests 335 | ``` 336 | 337 | ## Настройки 338 | 339 | - **pg_uprobe.data_dir** - Путь к каталогу, в котором будет создаваться файл с результатами трассирования сеанса и файлы с результатами профилирования функций. По умолчанию: `$PGDATA/pg_uprobe` 340 | - **pg_uprobe.trace_file_name** - Имя файла для результатов трассирования сеанса. По умолчанию: `trace_file.txt` 341 | - **pg_uprobe.trace_file_limit** - Лимит в мегабайтах для файла с результатами трассирования сеанса. По умолчанию: 16 МБ 342 | - **pg_uprobe.trace_write_mode** - Формат вывода информации для трассирования сеанса. Поддерживаемые значения: "text", "json". По умолчанию: `json` 343 | - **pg_uprobe.trace_lwlocks_for_each_node** - Если `true`, статистика по LWLock будет сбрасываться после выполнения каждого `Executor Node;`, иначе статистика будет сбрасываться после завершения функции `PortalRun`. По умолчанию: `true` 344 | - **pg_uprobe.write_only_sleep_lwlocks_stat** - Если `true`, статистика по LWLock будет писаться только в случае ожидания блокировки, иначе будет писаться статистика по всем LWLock, которые были захвачены. По умолчанию: `true` 345 | 346 | ## Использование 347 | 348 | Документация по [трассированию сеансов](doc/trace_session.md) 349 | 350 | Документация по [профилированию функций PostgreSQL](doc/profile_func.md) 351 | 352 | ## Примеры использования 353 | 354 | Простой пример использования [трассирования сеанса](doc/example_trace_session.md) 355 | 356 | Простой пример использования [профилирования функций PostgreSQL](doc/example_profile_func.md) 357 | 358 | ## Модель ветвления кода 359 | 360 | В качестве основной модели ветвления кода в git-репозитории используется **gitFlow** -------------------------------------------------------------------------------- /tests/test_functions_profile.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | import random 4 | import os 5 | from decimal import Decimal 6 | from testgres import PostgresNode 7 | from testgres import NodeConnection 8 | from time import sleep 9 | 10 | from utils import node_read_file, test_wrapper 11 | 12 | test_multy_functions = ["PortalStart", "PortalRun", "GetCachedPlan", "index_getnext_tid", 13 | "fopen", "fclose", "tuplesort_performsort", "SerializeSnapshot", 14 | "LockBuffer", "ReleaseCachedPlan", "PushActiveSnapshot", 15 | "pg_parse_query", "CreatePortal", "ExecInitNode", 16 | "ExecAssignExprContext", "index_beginscan", "index_getnext_slot", 17 | "LWLockAcquire", "LWLockRelease", "index_endscan"] 18 | 19 | 20 | def node_has_file(node: PostgresNode, path: str) -> bool: 21 | return os.path.isfile(node.data_dir + path) 22 | 23 | 24 | def set_uprobe(conn: NodeConnection, func: str, type: str, is_shared: bool): 25 | conn.execute(f"select set_uprobe('{func}', '{type}', {is_shared})") 26 | 27 | 28 | def dump_uprobe_stat(conn: NodeConnection, func: str, should_clean: bool): 29 | conn.execute(f"select dump_uprobe_stat('{func}', {should_clean})") 30 | 31 | 32 | 33 | def check_hist_on_percent_values(tuples: list[tuple]): 34 | for t in tuples: 35 | assert len(t) == 3, "In each tupple should be 3 lines" 36 | 37 | 38 | is_non_zero = False 39 | percent_summ = Decimal(0.000) 40 | for t in tuples: 41 | if (t[2] != Decimal(0.000)): 42 | is_non_zero = True 43 | percent_summ += t[2] 44 | 45 | assert is_non_zero, "At least one value in third column should be grater than zero for this query" 46 | 47 | assert abs(percent_summ - Decimal(100.0)) < 1.0, "Summ of all percents should be close to close to 100" 48 | 49 | 50 | 51 | def check_TIME_uprobe_file(file_lines: list[str], should_have_calls: bool): 52 | assert len(file_lines) == 1, "In this file should be only one line" 53 | 54 | result_numbers = re.findall(r'\d+', file_lines[0]) 55 | 56 | if should_have_calls: 57 | assert int(result_numbers[0]) > 0, "There must be calls of function" 58 | assert int(result_numbers[1]) > 0, "Time summ of calls can't be 0 or negative" 59 | else: 60 | assert int(result_numbers[0]) >= 0, "Number of calls can't be negative" 61 | assert int(result_numbers[1]) >= 0, "Time summ of calls can't be negative" 62 | 63 | 64 | def TIME_uprobe_get_stat(file_lines: list[str]) -> tuple: 65 | result_numbers = re.findall(r'\d+', file_lines[0]) 66 | return (int(result_numbers[0]), int(result_numbers[1])) 67 | 68 | 69 | def check_HIST_uprobe_file(file_lines: list[str]): 70 | assert len(file_lines) >= 1, "in this file should be at least 1 line" 71 | 72 | for line in file_lines[1:]: 73 | assert line.count(',') == 1, "this is csv file with 2 colums, so only one comma" 74 | line_numbers = line.split(',') 75 | assert Decimal(line_numbers[0]) >= 0, "call time can't be negative" 76 | assert int (line_numbers[1]) > 0, "number of calls must be grater than zero" 77 | 78 | 79 | def check_MEM_uprobe_file(file_lines: list[str]): 80 | assert len(file_lines) >= 1, "in this file should be more than 1 line" 81 | 82 | for line in file_lines[1:]: 83 | assert line.count(',') == 1, "this is csv file with 2 colums, so only one comma" 84 | line_numbers = line.split(',') 85 | assert int (line_numbers[1]) > 0, "number of calls must be grater than zero" 86 | 87 | 88 | def test_TIME_urpobe_local(node: PostgresNode): 89 | with node.connect("postgres", autocommit=True) as conn: 90 | set_uprobe(conn, 'PortalStart', 'TIME', False) 91 | conn.execute("select * from pg_class") 92 | result = conn.execute("select stat_time_uprobe('PortalStart')")[0][0] 93 | result_numbers = re.findall(r'\d+', result) 94 | assert int(result_numbers[0]) == 2, "There were two queries, so PortalStart should be called 2 times" 95 | assert int(result_numbers[1]) > 0, "Time summ of two call can't be 0 or negative" 96 | 97 | 98 | def test_HIST_uprobe_local(node: PostgresNode): 99 | with node.connect("postgres", autocommit=True) as conn: 100 | set_uprobe(conn, 'PortalStart', 'HIST', False) 101 | conn.execute("select * from pg_class") 102 | 103 | #fist stat_hist_uprobe_test 104 | result = conn.execute("select * from stat_hist_uprobe('PortalStart')") 105 | 106 | assert len(result) == 4, "In first call of stat_hist_urpobe the result should be 4 tuples" 107 | 108 | check_hist_on_percent_values(result) 109 | 110 | #second stat_hist_urpobe test 111 | result = conn.execute("select * from stat_hist_uprobe('PortalStart', 0.0, 100.0, 10.0)") 112 | 113 | assert len(result) == 12, "In this of stat_hist_urpobe the result should be 12 tuples" 114 | 115 | check_hist_on_percent_values(result) 116 | 117 | 118 | def test_TIME_uprobe_shared(node: PostgresNode): 119 | with node.connect("postgres", autocommit=True) as conn: 120 | set_uprobe(conn, 'PortalStart', 'TIME', True) 121 | 122 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 123 | 124 | dump_uprobe_stat(conn, 'PortalStart', True) 125 | result = node_read_file(node, "/pg_uprobe/TIME_PortalStart.txt") 126 | 127 | check_TIME_uprobe_file(result, True) 128 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 129 | 130 | def test_HIST_uprobe_shared(node: PostgresNode): 131 | with node.connect("postgres", autocommit=True) as conn: 132 | set_uprobe(conn, 'PortalStart', 'HIST', True) 133 | 134 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 135 | 136 | dump_uprobe_stat(conn, 'PortalStart', True) 137 | result = node_read_file(node, "/pg_uprobe/HIST_PortalStart.txt") 138 | 139 | assert len(result) > 1, "in this file should be more than 1 line" 140 | 141 | check_HIST_uprobe_file(result) 142 | 143 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 144 | 145 | def test_MEM_uprobe_shared(node: PostgresNode): 146 | with node.connect("postgres", autocommit=True) as conn: 147 | set_uprobe(conn, 'PortalStart', 'MEM', True) 148 | 149 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 150 | 151 | dump_uprobe_stat(conn, 'PortalStart', True) 152 | result = node_read_file(node, "/pg_uprobe/MEM_PortalStart.txt") 153 | 154 | check_MEM_uprobe_file(result) 155 | 156 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 157 | 158 | 159 | def test_multy_TIME_local_uprobes(node: PostgresNode): 160 | with node.connect("postgres", autocommit=True) as conn: 161 | for func in test_multy_functions: 162 | set_uprobe(conn, func, "TIME", False) 163 | 164 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 165 | 166 | functions_reorder = test_multy_functions.copy() 167 | random.shuffle(functions_reorder) 168 | 169 | for func in functions_reorder: 170 | result = conn.execute(f"select stat_time_uprobe('{func}')")[0][0] 171 | result_numbers = re.findall(r'\d+', result) 172 | assert len(result_numbers) == 2 173 | 174 | for func in functions_reorder: 175 | conn.execute(f"select delete_uprobe('{func}', false)") 176 | 177 | assert len(conn.execute("select * from list_uprobes()")) == 0 178 | 179 | def test_multi_HIST_local_uprobes(node: PostgresNode): 180 | with node.connect("postgres", autocommit=True) as conn: 181 | for func in test_multy_functions: 182 | set_uprobe(conn, func, "HIST", False) 183 | 184 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 185 | 186 | functions_reorder = test_multy_functions.copy() 187 | random.shuffle(functions_reorder) 188 | 189 | for func in functions_reorder: 190 | result = conn.execute(f"select * from stat_hist_uprobe('{func}')") 191 | 192 | assert len(result) > 3 or len(result) == 0, "In result histgram should be empty or have at least 4 lines" 193 | 194 | if len(result) != 0: 195 | check_hist_on_percent_values(result) 196 | 197 | for func in functions_reorder: 198 | result = conn.execute(f"select * from stat_hist_uprobe('{func}', 0.0, 100.0, 10.0)") 199 | 200 | assert len(result) == 12 or len(result) == 0, "In result histgram should be empty or have at least 4 lines" 201 | 202 | if len(result) != 0: 203 | check_hist_on_percent_values(result) 204 | 205 | 206 | for func in functions_reorder: 207 | conn.execute(f"select delete_uprobe('{func}', false)") 208 | 209 | assert len(conn.execute("select * from list_uprobes()")) == 0 210 | 211 | 212 | def test_multi_TIME_shared_uprobes(node: PostgresNode): 213 | with node.connect("postgres", autocommit=True) as conn: 214 | for func in test_multy_functions: 215 | set_uprobe(conn, func, "TIME", True) 216 | 217 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 218 | 219 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 220 | 221 | functions_reorder = test_multy_functions.copy() 222 | random.shuffle(functions_reorder) 223 | 224 | for func in functions_reorder: 225 | conn.execute(f"select dump_uprobe_stat('{func}', true)") 226 | sleep(0.2) #just in case 227 | file_lines = node_read_file(node, f"/pg_uprobe/TIME_{func}.txt") 228 | 229 | check_TIME_uprobe_file(file_lines, False) 230 | 231 | for func in functions_reorder: 232 | conn.execute(f"select delete_uprobe('{func}', false)") 233 | 234 | assert len(conn.execute("select * from list_uprobes()")) == 0 235 | 236 | 237 | def test_multi_HIST_shared_uprobes(node: PostgresNode): 238 | with node.connect("postgres", autocommit=True) as conn: 239 | for func in test_multy_functions: 240 | set_uprobe(conn, func, "HIST", True) 241 | 242 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 243 | 244 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 245 | 246 | functions_reorder = test_multy_functions.copy() 247 | random.shuffle(functions_reorder) 248 | 249 | for func in functions_reorder: 250 | 251 | conn.execute(f"select delete_uprobe('{func}', true)") 252 | sleep(0.2) #just in case 253 | file_lines = node_read_file(node, f"/pg_uprobe/HIST_{func}.txt") 254 | 255 | check_HIST_uprobe_file(file_lines) 256 | 257 | 258 | def test_multi_MEM_shared_uprobe(node: PostgresNode): 259 | with node.connect("postgres", autocommit=True) as conn: 260 | for func in test_multy_functions: 261 | set_uprobe(conn, func, "MEM", True) 262 | 263 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 264 | 265 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 266 | 267 | functions_reorder = test_multy_functions.copy() 268 | random.shuffle(functions_reorder) 269 | 270 | for func in functions_reorder: 271 | 272 | conn.execute(f"select delete_uprobe('{func}', true)") 273 | sleep(0.2) #just in case 274 | file_lines = node_read_file(node, f"/pg_uprobe/MEM_{func}.txt") 275 | 276 | check_MEM_uprobe_file(file_lines) 277 | 278 | 279 | def test_invalid_uprobe(node: PostgresNode): 280 | with node.connect("postgres", autocommit=True) as conn: 281 | has_error = False 282 | try: 283 | set_uprobe(conn, "12345", "HIST", True) 284 | except: 285 | has_error = True 286 | 287 | assert has_error, "there is no sucn function in Postgres(I hope)" 288 | 289 | has_error = False 290 | 291 | try: 292 | set_uprobe(conn, "PortalStart", "12345", True) 293 | except: 294 | has_error = True 295 | 296 | assert has_error, "there is no sucn uprobe type" 297 | 298 | 299 | def test_invalid_stat_args(node: PostgresNode): 300 | with node.connect("postgres", autocommit=True) as conn: 301 | set_uprobe(conn, "PortalStart", "TIME", False) 302 | has_error = False 303 | try: 304 | conn.execute("select * from stat_hist_uprobe('PortalStart')") 305 | except: 306 | has_error = True 307 | 308 | assert has_error, "You can't stat TIME uprobe with stat_hist_uprobe function" 309 | 310 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 311 | 312 | 313 | set_uprobe(conn, "PortalStart", "HIST", False) 314 | has_error = False 315 | try: 316 | conn.execute("select * from stat_time_uprobe('PortalStart')") 317 | except: 318 | has_error = True 319 | 320 | assert has_error, "You can't stat HIST uprobe with stat_TIME_uprobe function" 321 | 322 | 323 | has_error = False 324 | try: 325 | conn.execute("select * from stat_hist_uprobe('PortalStart', 100.0, 0.0, 10.0)") 326 | except: 327 | has_error = True 328 | 329 | assert has_error, "You must get error if you pass value start grater than stop" 330 | 331 | has_error = False 332 | try: 333 | conn.execute("select * from stat_hist_uprobe('PortalStart', 0.0, 100.0, -10.0)") 334 | except: 335 | has_error = True 336 | 337 | assert has_error, "You must get error if you pass negative step value" 338 | 339 | has_error = False 340 | try: 341 | conn.execute("select * from stat_hist_uprobe('PortalStart', 0.0, 100.0, 0.0)") 342 | except: 343 | has_error = True 344 | 345 | assert has_error, "You must get error if you pass zero step value" 346 | 347 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 348 | 349 | 350 | def test_invalid_uprobe_name(node: PostgresNode): 351 | with node.connect("postgres", autocommit=True) as conn: 352 | has_error = False 353 | try: 354 | conn.execute("select * from stat_hist_uprobe('PortalStart')") 355 | except: 356 | has_error = True 357 | 358 | assert has_error, "You shouldn't have uprobe on PortalStart at this point" 359 | 360 | has_error = False 361 | try: 362 | conn.execute("select * from stat_time_uprobe('PortalStart')") 363 | except: 364 | has_error = True 365 | 366 | assert has_error, "You shouldn't have uprobe on PortalStart at this point" 367 | 368 | set_uprobe(conn, "PortalStart", "TIME", False) 369 | 370 | has_error = False 371 | try: 372 | conn.execute("select * from stat_hist_uprobe('PortalRun')") 373 | except: 374 | has_error = True 375 | 376 | assert has_error, "You shouldn't have uprobe on PortalRun at this point" 377 | 378 | has_error = False 379 | try: 380 | conn.execute("select * from stat_time_uprobe('PortalRun')") 381 | except: 382 | has_error = True 383 | 384 | assert has_error, "You shouldn't have uprobe on PortalRun at this point" 385 | 386 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 387 | 388 | 389 | def test_delete_donot_save(node: PostgresNode): 390 | with node.connect("postgres", autocommit=True) as conn: 391 | for func in test_multy_functions: 392 | set_uprobe(conn, func, "HIST", True) 393 | 394 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 395 | 396 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 397 | 398 | functions_reorder = test_multy_functions.copy() 399 | random.shuffle(functions_reorder) 400 | 401 | for func in functions_reorder: 402 | conn.execute(f"select delete_uprobe('{func}', false)") 403 | 404 | assert not node_has_file(node, f"/pg_uprobe/HIST_{func}.txt") 405 | 406 | 407 | def test_stat_clear(node: PostgresNode): 408 | with node.connect("postgres", autocommit=True) as conn: 409 | for func in test_multy_functions: 410 | set_uprobe(conn, func, "TIME", True) 411 | 412 | assert len(conn.execute("select * from list_uprobes()")) == len(test_multy_functions) 413 | 414 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 415 | 416 | functions_reorder = test_multy_functions.copy() 417 | random.shuffle(functions_reorder) 418 | 419 | for func in functions_reorder: 420 | conn.execute(f"select dump_uprobe_stat('{func}', true)") 421 | sleep(0.2) 422 | file_lines = node_read_file(node, f"/pg_uprobe/TIME_{func}.txt") 423 | first_stat = TIME_uprobe_get_stat(file_lines) 424 | 425 | conn.execute(f"select dump_uprobe_stat('{func}', true)") 426 | sleep(0.2) 427 | file_lines = node_read_file(node, f"/pg_uprobe/TIME_{func}.txt") 428 | second_stat = TIME_uprobe_get_stat(file_lines) 429 | 430 | assert first_stat[0] > second_stat[0] or first_stat[0] == 0, "There must be less call in stat after stat drop or there we non in the first place" 431 | 432 | 433 | node.pgbench_run(time=5, client=10, jobs=5, builtin="select-only") 434 | 435 | 436 | prev_stat = [] 437 | post_stat = [] 438 | 439 | for func in functions_reorder: 440 | conn.execute(f"select dump_uprobe_stat('{func}', false)") 441 | sleep(0.2) 442 | file_lines = node_read_file(node, f"/pg_uprobe/TIME_{func}.txt") 443 | prev_stat.append(TIME_uprobe_get_stat(file_lines)) 444 | 445 | node.pgbench_run(time=5, client=10, jobs=5, builtin="select-only") 446 | 447 | 448 | for func in functions_reorder: 449 | conn.execute(f"select dump_uprobe_stat('{func}', false)") 450 | sleep(0.2) 451 | file_lines = node_read_file(node, f"/pg_uprobe/TIME_{func}.txt") 452 | post_stat.append(TIME_uprobe_get_stat(file_lines)) 453 | 454 | for i in range(len(functions_reorder)): 455 | assert prev_stat[i][0] <= post_stat[i][0], "We can't lose any calls if we don't drop stat" 456 | 457 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 458 | 459 | 460 | def test_data_dir_change(node: PostgresNode): 461 | Path("/tmp/test_session_trace_dir").mkdir(parents=True, exist_ok=True) 462 | node.execute("alter system set pg_uprobe.data_dir to '/tmp/test_session_trace_dir'") 463 | node.restart() 464 | 465 | with node.connect("postgres", autocommit=True) as conn: 466 | set_uprobe(conn, 'PortalStart', 'TIME', True) 467 | 468 | node.pgbench_run(time=10, client=10, jobs=5, builtin="select-only") 469 | 470 | dump_uprobe_stat(conn, 'PortalStart', True) 471 | result = open("/tmp/test_session_trace_dir/TIME_PortalStart.txt").readlines() 472 | 473 | check_TIME_uprobe_file(result, True) 474 | conn.execute("select delete_uprobe(func, false) from list_uprobes()") 475 | 476 | 477 | def run_tests(node: PostgresNode): 478 | test_wrapper(node, test_TIME_urpobe_local) 479 | test_wrapper(node, test_HIST_uprobe_local) 480 | 481 | test_wrapper(node, test_TIME_uprobe_shared) 482 | test_wrapper(node, test_HIST_uprobe_shared) 483 | test_wrapper(node, test_MEM_uprobe_shared) 484 | 485 | test_wrapper(node, test_multy_TIME_local_uprobes) 486 | test_wrapper(node, test_multi_HIST_local_uprobes) 487 | 488 | test_wrapper(node, test_multi_TIME_shared_uprobes) 489 | test_wrapper(node, test_multi_HIST_shared_uprobes) 490 | test_wrapper(node, test_multi_MEM_shared_uprobe) 491 | 492 | test_wrapper(node, test_invalid_uprobe) 493 | test_wrapper(node, test_invalid_stat_args) 494 | test_wrapper(node, test_invalid_uprobe_name) 495 | test_wrapper(node, test_delete_donot_save) 496 | test_wrapper(node, test_stat_clear) 497 | 498 | test_wrapper(node, test_data_dir_change) 499 | -------------------------------------------------------------------------------- /tests/test_trace_session.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | from decimal import Decimal 4 | from testgres import PostgresNode 5 | from testgres import NodeConnection 6 | from time import sleep 7 | from utils import node_read_file_one_line, test_wrapper, node_get_file_size, load_scripts, extract_numbers_as_strings_from_time_point 8 | import json 9 | 10 | 11 | def start_session_trace(conn: NodeConnection): 12 | conn.execute("select start_session_trace()") 13 | 14 | 15 | def start_session_trace_pid(conn: NodeConnection, pid: int): 16 | conn.execute(f"select start_session_trace({pid})") 17 | 18 | 19 | def stop_session_trace_pid(conn: NodeConnection, pid: int): 20 | conn.execute(f"select stop_session_trace({pid})") 21 | 22 | 23 | def stop_session_trace(conn: NodeConnection): 24 | conn.execute("select stop_session_trace()") 25 | 26 | 27 | def validate_explain_field(json): 28 | assert type(json) is dict 29 | 30 | assert 'Query Text' in json 31 | 32 | assert 'Plan' in json 33 | 34 | 35 | def validate_trace_data_field(json): 36 | assert type(json) is dict 37 | 38 | assert 'maxTime' in json 39 | assert type(json['maxTime']) is int 40 | 41 | assert 'totalCalls' in json 42 | assert type(json['totalCalls']) is int 43 | 44 | assert 'totalTimeSum' in json 45 | assert type(json['totalTimeSum']) is int 46 | 47 | if json['totalCalls'] > 0: 48 | assert json['maxTime'] > 0 49 | assert json['totalTimeSum'] > 0 50 | assert json['maxTime'] <= json['totalTimeSum'] 51 | 52 | 53 | 54 | def validate_explain_with_node_stat_field(json): 55 | assert type(json) is dict 56 | 57 | assert 'traceData' in json 58 | 59 | validate_trace_data_field(json['traceData']) 60 | 61 | if 'Plans' in json: 62 | assert type(json['Plans']) is list 63 | for subplan in json['Plans']: 64 | validate_explain_with_node_stat_field(subplan) 65 | 66 | 67 | 68 | def validate_nanosec_time_field(json, grater_than_zero = True): 69 | assert type(json) is str 70 | 71 | numbers = re.findall(r'\d+', json) 72 | 73 | assert len(numbers) == 1 74 | 75 | if grater_than_zero: 76 | assert int(numbers[0]) > 0, "All times should be grater than zero" 77 | 78 | 79 | def validate_wait_event_stat_object(json): 80 | assert type(json) is dict 81 | 82 | assert 'name' in json 83 | 84 | assert type(json['name']) is str 85 | 86 | assert 'count' in json 87 | 88 | assert type(json['count']) is int 89 | 90 | assert json['count'] > 0 91 | 92 | assert 'timeSum' in json 93 | 94 | validate_nanosec_time_field(json['timeSum']) 95 | 96 | assert 'maxTime' in json 97 | 98 | validate_nanosec_time_field(json['maxTime']) 99 | 100 | 101 | def validate_wait_event_stat_field(json): 102 | assert type(json) is list 103 | 104 | for stat in json: 105 | validate_wait_event_stat_object(stat) 106 | 107 | 108 | def validate_buffer_tag_object(json): 109 | assert type(json) is dict 110 | 111 | assert "spcOid" in json 112 | assert type(json['spcOid']) is int 113 | 114 | assert "spcName" in json 115 | assert type(json['spcName']) is str 116 | 117 | assert "dbOid" in json 118 | assert (type(json["dbOid"])) is int 119 | 120 | assert "dbName" in json 121 | assert type(json['dbName']) is str 122 | 123 | assert "relNumber" in json 124 | assert (type(json["relNumber"])) is int 125 | 126 | assert "relName" in json 127 | assert type(json['relName']) is str 128 | 129 | assert "relKind" in json 130 | assert (type(json["relKind"])) is str 131 | 132 | assert "forkName" in json 133 | assert (type(json["forkName"])) is str 134 | 135 | assert "blockNumber" in json 136 | assert (type(json["blockNumber"])) is int 137 | 138 | 139 | def validate_lwlock_stat_calls_object(json): 140 | assert type(json) is dict 141 | 142 | assert "totalCalls" in json 143 | assert type(json['totalCalls']) is int 144 | 145 | assert "sleepCount" in json 146 | assert type(json['sleepCount']) is int 147 | 148 | assert "sleepTimeSum" in json 149 | validate_nanosec_time_field(json["sleepTimeSum"], json['sleepCount'] > 0) 150 | 151 | assert "maxSleepTime" in json 152 | validate_nanosec_time_field(json["maxSleepTime"], json['sleepCount'] > 0) 153 | 154 | 155 | 156 | def validate_LWLock_stat_object(json): 157 | assert type(json) is dict 158 | 159 | assert "bufferTag" in json 160 | validate_buffer_tag_object(json["bufferTag"]) 161 | 162 | assert "exclusive" in json 163 | validate_lwlock_stat_calls_object(json["exclusive"]) 164 | 165 | assert "shared" in json 166 | validate_lwlock_stat_calls_object(json["shared"]) 167 | 168 | 169 | 170 | def validate_LWLock_stat_field(json): 171 | assert type(json) is list 172 | 173 | for obj in json: 174 | validate_LWLock_stat_object(obj) 175 | 176 | 177 | def validate_execution_event_object(json): 178 | assert type(json) is dict 179 | 180 | if "executionEvents" in json: 181 | validate_one_query_trace(json) 182 | return 183 | 184 | assert 'node' in json 185 | 186 | assert type(json['node']) is str 187 | 188 | assert 'executeTime' in json 189 | 190 | validate_nanosec_time_field(json['executeTime']) 191 | 192 | if 'explain' in json: 193 | assert type(json['explain']) is dict 194 | 195 | if 'LWLockStat' in json: 196 | validate_LWLock_stat_field(json['LWLockStat']) 197 | 198 | 199 | 200 | 201 | def validate_execution_events_filed(json): 202 | assert type(json) is list 203 | 204 | for obj in json: 205 | validate_execution_event_object(obj) 206 | 207 | 208 | def validate_execution_start_field(json): 209 | assert type(json) is str 210 | #json should look like this "2025:06:18T12:17:31.006" 211 | 212 | numbers = extract_numbers_as_strings_from_time_point(json) 213 | 214 | assert len(numbers) == 7 215 | 216 | assert len(numbers[0]) == 4, "len of year should be 4" 217 | assert len(numbers[-1]) == 3, "expect only ms part of nanoseconds" 218 | 219 | for i in range(1, (len(numbers) - 1)): 220 | assert len(numbers[i]) == 2 221 | 222 | def validate_one_query_trace(json): 223 | assert type(json) is dict 224 | 225 | # for cases where last item in json array is empty 226 | if len(json) == 0: 227 | return 228 | 229 | if 'parsingTime' in json: 230 | validate_nanosec_time_field(json['parsingTime']) 231 | 232 | if 'planningTime' in json: 233 | validate_nanosec_time_field(json['planningTime']) 234 | 235 | assert 'executionStart' in json 236 | 237 | validate_execution_start_field(json['executionStart']) 238 | 239 | assert 'explain' in json 240 | 241 | validate_explain_field(json['explain']) 242 | 243 | assert 'executionEvents' in json 244 | 245 | validate_execution_events_filed(json['executionEvents']) 246 | 247 | assert 'executionTime' in json 248 | 249 | validate_nanosec_time_field(json['executionTime']) 250 | 251 | if 'executorNodeStatInPlan' in json: 252 | 253 | validate_explain_with_node_stat_field(json['executorNodeStatInPlan']) 254 | 255 | if 'waitEventStat' in json: 256 | validate_wait_event_stat_field(json['waitEventStat']) 257 | 258 | if 'LWLockPlanning' in json: 259 | validate_LWLock_stat_field(json['LWLockPlanning']) 260 | 261 | if 'LWLockParsing' in json: 262 | validate_LWLock_stat_field(json['LWLockParsing']) 263 | 264 | if 'locksInsidePortalRun' in json: 265 | validate_LWLock_stat_field(json['locksInsidePortalRun']) 266 | 267 | if 'locksOutsidePortalRun' in json: 268 | validate_LWLock_stat_field(json['locksOutsidePortalRun']) 269 | 270 | 271 | def validate_session_trace_result_simple(json, pid): 272 | assert type(json) is dict 273 | assert 'pid' in json 274 | assert type(json['pid']) is int 275 | assert json['pid'] == pid 276 | assert 'queries' in json 277 | assert type(json['queries']) is list 278 | assert len(json['queries']) == 2, "in this trace file should be only 2 queries" 279 | 280 | validate_one_query_trace(json['queries'][0]) 281 | 282 | 283 | def validate_each_session_trace_result(json, pid): 284 | assert type(json) is dict 285 | assert 'pid' in json 286 | assert type(json['pid']) is int 287 | assert json['pid'] == pid 288 | assert 'queries' in json 289 | assert type(json['queries']) is list 290 | 291 | for obj in json['queries']: 292 | validate_one_query_trace(obj) 293 | 294 | 295 | def trace_current_session_trace(node: PostgresNode): 296 | with node.connect("postgres", autocommit=True) as conn: 297 | start_session_trace(conn) 298 | 299 | conn.execute("select * from pgbench_accounts LIMIT 5") 300 | 301 | stop_session_trace(conn) 302 | 303 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 304 | 305 | validate_session_trace_result_simple(json.loads(result), conn.pid) 306 | 307 | 308 | def trace_current_session_trace_non_sleep_buffer_locks(node: PostgresNode): 309 | with node.connect("postgres", autocommit=True) as conn: 310 | conn.execute("set pg_uprobe.write_only_sleep_lwlocks_stat to false") 311 | 312 | start_session_trace(conn) 313 | 314 | conn.execute("select * from pgbench_accounts LIMIT 5") 315 | 316 | stop_session_trace(conn) 317 | 318 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 319 | 320 | validate_session_trace_result_simple(json.loads(result), conn.pid) 321 | 322 | 323 | def trace_current_session_trace_non_sleep_buffer_locks_for_each_node(node: PostgresNode): 324 | with node.connect("postgres", autocommit=True) as conn: 325 | conn.execute("set pg_uprobe.write_only_sleep_lwlocks_stat to false") 326 | 327 | conn.execute("set pg_uprobe.trace_lwlocks_for_each_node to true") 328 | 329 | start_session_trace(conn) 330 | 331 | conn.execute("select * from pgbench_accounts LIMIT 5") 332 | 333 | stop_session_trace(conn) 334 | 335 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 336 | 337 | 338 | validate_session_trace_result_simple(json.loads(result), conn.pid) 339 | 340 | 341 | def trace_current_session_change_data_dir(node: PostgresNode): 342 | Path("/tmp/test_session_trace_dir").mkdir(parents=True, exist_ok=True) 343 | 344 | with node.connect("postgres", autocommit=True) as conn: 345 | 346 | conn.execute("set pg_uprobe.data_dir to '/tmp/test_session_trace_dir'") 347 | 348 | start_session_trace(conn) 349 | 350 | conn.execute("select * from pgbench_accounts LIMIT 5") 351 | 352 | stop_session_trace(conn) 353 | 354 | result = open(f"/tmp/test_session_trace_dir/trace_file.txt_{conn.pid}").read(-1) 355 | 356 | validate_session_trace_result_simple(json.loads(result), conn.pid) 357 | 358 | 359 | def trace_current_session_change_trace_file_name(node: PostgresNode): 360 | with node.connect("postgres", autocommit=True) as conn: 361 | 362 | conn.execute("set pg_uprobe.trace_file_name to 'trace'") 363 | 364 | conn.execute("set pg_uprobe.write_only_sleep_lwlocks_stat to false") 365 | 366 | conn.execute("set pg_uprobe.trace_lwlocks_for_each_node to true") 367 | 368 | start_session_trace(conn) 369 | 370 | conn.execute("select * from pgbench_accounts LIMIT 5") 371 | 372 | stop_session_trace(conn) 373 | 374 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_{conn.pid}") 375 | 376 | validate_session_trace_result_simple(json.loads(result), conn.pid) 377 | 378 | 379 | def trace_current_session_check_file_limit(node: PostgresNode): 380 | with node.connect("postgres", autocommit=True) as conn: 381 | 382 | conn.execute("set pg_uprobe.write_only_sleep_lwlocks_stat to false") 383 | 384 | conn.execute("set pg_uprobe.trace_lwlocks_for_each_node to true") 385 | 386 | start_session_trace(conn) 387 | 388 | for i in range(100000): 389 | conn.execute("select * from pgbench_accounts LIMIT 5") 390 | 391 | stop_session_trace(conn) 392 | 393 | result = node_get_file_size(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 394 | 395 | assert (result/1024/1024) > 16 and (result/1024/1024) < 17, "result file can be a little bigger than 16mb, but can't be too big" 396 | 397 | 398 | def trace_current_session_change_check_file_limit(node: PostgresNode): 399 | with node.connect("postgres", autocommit=True) as conn: 400 | 401 | conn.execute("set pg_uprobe.trace_file_limit to 32") 402 | 403 | conn.execute("set pg_uprobe.write_only_sleep_lwlocks_stat to false") 404 | 405 | conn.execute("set pg_uprobe.trace_lwlocks_for_each_node to true") 406 | 407 | start_session_trace(conn) 408 | 409 | for i in range(100000): 410 | conn.execute("select * from pgbench_accounts LIMIT 5") 411 | 412 | stop_session_trace(conn) 413 | 414 | result = node_get_file_size(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 415 | 416 | assert (result/1024/1024) > 32 and (result/1024/1024) < 33, "result file can be a little bigger than 32mb, but can't be too big" 417 | 418 | 419 | def trace_current_session_write_mod(node: PostgresNode): 420 | with node.connect("postgres", autocommit=True) as conn: 421 | conn.execute("set pg_uprobe.trace_write_mode TO text") 422 | 423 | start_session_trace(conn) 424 | 425 | conn.execute("select * from pgbench_accounts LIMIT 5") 426 | 427 | stop_session_trace(conn) 428 | 429 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 430 | 431 | 432 | has_error = False 433 | 434 | try: 435 | validate_session_trace_result_simple(json.loads(result), conn.pid) 436 | except: 437 | has_error = True 438 | 439 | assert has_error, "result should't be json" 440 | 441 | 442 | def trace_current_session_large(node: PostgresNode): 443 | with node.connect("postgres", autocommit=True) as conn: 444 | start_session_trace(conn) 445 | 446 | for sql in load_scripts(): 447 | conn.execute(sql) 448 | 449 | 450 | stop_session_trace(conn) 451 | 452 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 453 | 454 | validate_each_session_trace_result(json.loads(result), conn.pid) 455 | 456 | 457 | def trace_session_pid(node: PostgresNode): 458 | with node.connect("postgres", autocommit=True) as conn: 459 | pid:int 460 | with node.connect("postgres", autocommit=True) as node_trace: 461 | pid = node_trace.pid 462 | start_session_trace_pid(conn, pid) 463 | 464 | for sql in load_scripts(): 465 | node_trace.execute(sql) 466 | 467 | 468 | stop_session_trace_pid(conn, pid) 469 | 470 | 471 | sleep(1) 472 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{pid}") 473 | validate_each_session_trace_result(json.loads(result), pid) 474 | 475 | 476 | def trace_session_plpgsql_functions(node: PostgresNode): 477 | with node.connect("postgres", autocommit=True) as conn: 478 | start_session_trace(conn) 479 | 480 | assert conn.execute("select calculate_order_total(2)") == [(Decimal('80000.00'),)] 481 | conn.execute("select update_order_status(2, 'SHIPPED')") 482 | conn.execute("select add_product_to_order(1, 2, 100)") 483 | assert conn.execute("select * from get_user_orders(1)") == [(1, Decimal('4050000.00'), 'NEW', 2,)] 484 | conn.execute("select create_user('aboba', 'boba', 'aboba_boba@example.ru', '+79132281337')") 485 | assert conn.execute("select * from get_low_stock_products(10000)") == [(2,"Samsung Galaxy S10",50,40000.00),(1,"iPhone X",100,50000.00), (3,"Xiaomi Redmi Note 8 Pro",200,25000.00)] 486 | assert conn.execute("SELECT process_complete_order(1, ARRAY[2,3], ARRAY[10, 5])") == [(4,)] 487 | 488 | stop_session_trace(conn) 489 | 490 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 491 | validate_each_session_trace_result(json.loads(result), conn.pid) 492 | 493 | 494 | def trace_session_plpgsql_functions_exceptions(node: PostgresNode): 495 | with node.connect("postgres", autocommit=True) as conn: 496 | start_session_trace(conn) 497 | 498 | try: 499 | conn.execute("SELECT process_complete_order(1, ARRAY[5,8], ARRAY[10, 5])") 500 | except: 501 | pass 502 | 503 | stop_session_trace(conn) 504 | 505 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 506 | validate_each_session_trace_result(json.loads(result), conn.pid) 507 | 508 | 509 | def trace_session_correct_executor_finish(node: PostgresNode): 510 | with node.connect("postgres", autocommit=True) as conn: 511 | start_session_trace(conn) 512 | 513 | conn.execute("create table mlparted (a int, b int)") 514 | 515 | conn.execute("with ins (a, b, c) as \ 516 | (insert into mlparted (b, a) select s.a, 1 from generate_series(2, 39) s(a) returning tableoid::regclass, *) \ 517 | select a, b, min(c), max(c) from ins group by a, b order by 1;") 518 | 519 | conn.execute("drop table mlparted") 520 | 521 | stop_session_trace(conn) 522 | 523 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 524 | validate_each_session_trace_result(json.loads(result), conn.pid) 525 | 526 | def trace_session_fetch(node: PostgresNode): 527 | with node.connect("postgres", autocommit=True) as conn: 528 | start_session_trace(conn) 529 | 530 | conn.execute("CREATE TABLE INT8_TBL(q1 int8, q2 int8)") 531 | 532 | conn.execute("INSERT INTO INT8_TBL VALUES \ 533 | (' 123 ',' 456'), \ 534 | ('123 ','4567890123456789'), \ 535 | ('4567890123456789','123'), \ 536 | (+4567890123456789,'4567890123456789'), \ 537 | ('+4567890123456789','-4567890123456789')") 538 | conn.execute("begin") 539 | conn.execute("create function nochange(int) returns int \ 540 | as 'select $1 limit 1' language sql stable") 541 | conn.execute("declare c cursor for select * from int8_tbl limit nochange(3)") 542 | conn.execute("fetch all from c") 543 | conn.execute("move backward all in c") 544 | conn.execute("fetch all from c") 545 | conn.execute("rollback") 546 | stop_session_trace(conn) 547 | 548 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 549 | validate_each_session_trace_result(json.loads(result), conn.pid) 550 | 551 | def trace_session_correct_with_jit(node: PostgresNode): 552 | with node.connect("postgres", autocommit=True) as conn: 553 | conn.execute("set jit to on") 554 | conn.execute("set jit_above_cost to 0.0") 555 | conn.execute("set jit_inline_above_cost to 0.0") 556 | conn.execute("set jit_optimize_above_cost to 0.0") 557 | start_session_trace(conn) 558 | 559 | conn.execute("create table mlparted (a int, b int)") 560 | conn.execute("with ins (a, b, c) as \ 561 | (insert into mlparted (b, a) select s.a, 1 from generate_series(2, 39) s(a) returning tableoid::regclass, *) \ 562 | select a, b, min(c), max(c) from ins group by a, b order by 1;") 563 | 564 | conn.execute("drop table mlparted") 565 | 566 | stop_session_trace(conn) 567 | 568 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 569 | validate_each_session_trace_result(json.loads(result), conn.pid) 570 | 571 | def trace_session_correct_fetch_zero_desc(node: PostgresNode): 572 | with node.connect("postgres", autocommit=True) as conn: 573 | start_session_trace(conn) 574 | 575 | conn.execute("CREATE FUNCTION create_temp_tab() RETURNS text \ 576 | LANGUAGE plpgsql AS $$ \ 577 | BEGIN \ 578 | CREATE TEMP TABLE new_table (f1 float); \ 579 | INSERT INTO new_table SELECT invert(0.0); \ 580 | RETURN 'foo'; \ 581 | END $$;") 582 | conn.execute("BEGIN") 583 | conn.execute("DECLARE ctt CURSOR FOR SELECT create_temp_tab()") 584 | conn.execute("SAVEPOINT s1") 585 | try: 586 | conn.execute("FETCH ctt") 587 | except: 588 | pass 589 | conn.execute("ROLLBACK TO s1") 590 | try: 591 | conn.execute("FETCH ctt") 592 | except: 593 | pass 594 | conn.execute("ROLLBACK") 595 | 596 | stop_session_trace(conn) 597 | 598 | result = node_read_file_one_line(node, f"/pg_uprobe/trace_file.txt_{conn.pid}") 599 | validate_each_session_trace_result(json.loads(result), conn.pid) 600 | 601 | def run_tests(node: PostgresNode): 602 | test_wrapper(node, trace_current_session_trace) 603 | test_wrapper(node, trace_current_session_trace_non_sleep_buffer_locks) 604 | test_wrapper(node, trace_current_session_trace_non_sleep_buffer_locks_for_each_node) 605 | 606 | test_wrapper(node, trace_current_session_change_data_dir) 607 | test_wrapper(node, trace_current_session_change_trace_file_name) 608 | test_wrapper(node, trace_current_session_check_file_limit) 609 | test_wrapper(node, trace_current_session_change_check_file_limit) 610 | test_wrapper(node, trace_current_session_write_mod) 611 | 612 | test_wrapper(node, trace_current_session_large) 613 | test_wrapper(node, trace_session_pid) 614 | test_wrapper(node, trace_session_plpgsql_functions) 615 | test_wrapper(node, trace_session_plpgsql_functions_exceptions) 616 | test_wrapper(node, trace_session_correct_executor_finish) 617 | test_wrapper(node, trace_session_fetch) 618 | test_wrapper(node, trace_session_correct_with_jit) 619 | test_wrapper(node, trace_session_correct_fetch_zero_desc) --------------------------------------------------------------------------------