├── paper_imp ├── cfg │ ├── __init__.py │ ├── tools.py │ ├── testbench.sh │ ├── setting.py │ ├── msg_draw.py │ ├── _cfg_rt_eval.py │ ├── dijkstra_milestone_placement.py │ ├── _cfg_graph.py │ ├── _cfg_solver.py │ ├── basic_block.py │ ├── slack.py │ ├── msg_binarize.py │ ├── decorator.py │ ├── tprofile.py │ ├── tracer.py │ └── lean_cfg.py ├── demo │ ├── application │ │ ├── mser │ │ ├── sift │ │ ├── disparity │ │ ├── tracking │ │ └── texture_synthesis │ └── milestone_graphs │ │ ├── README.md │ │ ├── tmg │ │ ├── mser.ttmsg │ │ ├── sift.ttmsg │ │ ├── disparity.ttmsg │ │ ├── texture.ttmsg │ │ └── tracking.ttmsg │ │ └── mg │ │ ├── sift.merge.dot │ │ ├── tracking.reduced.dot │ │ ├── texture_synthesis.tmsg.dot │ │ ├── texture_synthesis.create_texture.tmsg.dot │ │ ├── sift.sift.tmsg.dot │ │ ├── sift.gaussianss.tmsg.dot │ │ ├── sift.tmsg.dot │ │ └── disparity.tmsg.dot ├── tracee │ ├── include │ │ ├── pmu_cc.h │ │ ├── buffer.h │ │ ├── cs_config.h │ │ ├── argparse.h │ │ ├── pmu_event.h │ │ ├── zcu_cs.h │ │ └── cs_pmu.h │ ├── README.md │ ├── Makefile │ ├── src │ │ ├── cs_soc.c │ │ ├── buffer.c │ │ ├── argparse.c │ │ ├── zcu_cs.c │ │ └── cs_config.c │ └── main │ │ ├── bench.cpp │ │ ├── pmcc.c │ │ ├── start_mp.cpp │ │ └── start.cpp ├── tracer │ ├── src │ │ ├── platform_config.h │ │ ├── handlers.h │ │ ├── handlers.c │ │ ├── zcu_cs.h │ │ ├── platform.h │ │ ├── trace.h │ │ ├── etm.h │ │ ├── platform.c │ │ ├── parser.c │ │ └── etm.c │ └── README.md └── trc_parser_offline │ ├── Makefile │ ├── headers │ ├── handlers.h │ └── trace.h │ └── src │ ├── ctrace.c │ └── handlers.c ├── csc ├── util │ ├── devmem │ └── dbg.sh ├── include │ ├── pmu_cc.h │ ├── common.h │ ├── pmu_event.h │ ├── cs_pmu.h │ ├── cs_config.h │ └── zcu_cs.h ├── main │ ├── hello_ETM.c │ ├── start_mp.c │ ├── start_etm_pmu.c │ ├── start_etr.c │ ├── start_sram.c │ └── start_cnt_pmu_event.c ├── Makefile ├── README.md └── src │ └── zcu_cs.c ├── ETM_data_parser ├── Makefile ├── headers │ ├── handlers.h │ └── trace.h └── src │ ├── ctrace.c │ └── handlers.c ├── deformat ├── makefile ├── README.md └── deformat.c └── support └── enable_arm_pmu.c /paper_imp/cfg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /csc/util/devmem: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/csc/util/devmem -------------------------------------------------------------------------------- /paper_imp/demo/application/mser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/application/mser -------------------------------------------------------------------------------- /paper_imp/demo/application/sift: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/application/sift -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/README.md: -------------------------------------------------------------------------------- 1 | # Milestone Graph (MG) and Timed Milestone Graph (TMG) 2 | 3 | 4 | -------------------------------------------------------------------------------- /paper_imp/demo/application/disparity: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/application/disparity -------------------------------------------------------------------------------- /paper_imp/demo/application/tracking: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/application/tracking -------------------------------------------------------------------------------- /paper_imp/demo/application/texture_synthesis: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/application/texture_synthesis -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/tmg/mser.ttmsg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/milestone_graphs/tmg/mser.ttmsg -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/tmg/sift.ttmsg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/milestone_graphs/tmg/sift.ttmsg -------------------------------------------------------------------------------- /csc/include/pmu_cc.h: -------------------------------------------------------------------------------- 1 | #ifndef PMU_CC 2 | #define PMU_CC 3 | 4 | #define PMCCNTR_EL0 0xf8 5 | #define PMCCFILTR_EL0 0x47c 6 | 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/tmg/disparity.ttmsg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/milestone_graphs/tmg/disparity.ttmsg -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/tmg/texture.ttmsg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/milestone_graphs/tmg/texture.ttmsg -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/tmg/tracking.ttmsg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wchen258/TPAw0v/HEAD/paper_imp/demo/milestone_graphs/tmg/tracking.ttmsg -------------------------------------------------------------------------------- /paper_imp/tracee/include/pmu_cc.h: -------------------------------------------------------------------------------- 1 | #ifndef PMU_CC 2 | #define PMU_CC 3 | 4 | #define PMCCNTR_EL0 0xf8 5 | #define PMCCFILTR_EL0 0x47c 6 | 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/platform_config.h: -------------------------------------------------------------------------------- 1 | #ifndef __PLATFORM_CONFIG_H_ 2 | #define __PLATFORM_CONFIG_H_ 3 | 4 | #define STDOUT_IS_PSU_UART 5 | #define UART_DEVICE_ID 0 6 | #endif 7 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/handlers.h: -------------------------------------------------------------------------------- 1 | #ifndef HANDLERS_H_ 2 | #define HANDLERS_H_ 3 | 4 | #include "xil_printf.h" 5 | 6 | void report(const char* format, ... ); 7 | 8 | #endif // HANDLERS_H_ 9 | -------------------------------------------------------------------------------- /ETM_data_parser/Makefile: -------------------------------------------------------------------------------- 1 | SRC_FILES := $(shell find src/*.c) 2 | 3 | all: ctrace 4 | 5 | ctrace: $(SRC_FILES) 6 | $(CC) -o ctrace $(SRC_FILES) -I headers/ -Wall -g 7 | 8 | clean: 9 | rm -f ctrace 10 | -------------------------------------------------------------------------------- /paper_imp/trc_parser_offline/Makefile: -------------------------------------------------------------------------------- 1 | SRC_FILES := $(shell find src/*.c) 2 | 3 | all: ctrace 4 | 5 | ctrace: $(SRC_FILES) 6 | $(CC) -o ctrace $(SRC_FILES) -I headers/ -Wall -g 7 | 8 | clean: 9 | rm -f ctrace 10 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/handlers.c: -------------------------------------------------------------------------------- 1 | #include "handlers.h" 2 | 3 | void report(const char* format, ... ) { 4 | va_list args; 5 | va_start(args, format); 6 | xil_vprintf(format, args); 7 | va_end(args); 8 | xil_printf("\n\r"); 9 | } 10 | -------------------------------------------------------------------------------- /paper_imp/tracer/README.md: -------------------------------------------------------------------------------- 1 | # Tracer 2 | 3 | The bare-metal application runs on the Cortex-R5 serves as the tracer. The code include the monitoring logic. It also depicts how the tracer reads the Timed Milestone Graph to program the ETM dynamically. 4 | -------------------------------------------------------------------------------- /deformat/makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | 3 | all: deformat.o 4 | $(CC) -g -o deformat deformat.o 5 | 6 | deformat.o: deformat.c 7 | $(CC) -g -c deformat.c 8 | 9 | clean: 10 | rm deformat deformat.o 11 | rm trc_*.dat trc_*.out trc_*.hum 12 | 13 | .PHONY: all clean 14 | -------------------------------------------------------------------------------- /paper_imp/tracee/include/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef BUFFER_H 2 | #define BUFFER_H 3 | 4 | #include 5 | 6 | uint32_t *get_buf_ptr(uint64_t buf_addr, uint32_t buf_size); 7 | void clear_buffer(uint64_t buf_addr, uint32_t buf_size); 8 | void dump_buffer(uint64_t buf_addr, uint32_t buf_size); 9 | 10 | 11 | #endif -------------------------------------------------------------------------------- /paper_imp/cfg/tools.py: -------------------------------------------------------------------------------- 1 | def derive_end_point(g): 2 | exits = [] 3 | for node in g.nodes: 4 | if g.out_degree(node) == 0: 5 | exits.append(node) 6 | 7 | try: 8 | entry = min(list(g.nodes), key=lambda x:x.content[0].addr) 9 | except AttributeError: 10 | entry = min(list(g.nodes)) 11 | return entry, exits 12 | 13 | 14 | -------------------------------------------------------------------------------- /deformat/README.md: -------------------------------------------------------------------------------- 1 | # Trace Formatter 2 | 3 | The trace stream traverse through various CoreSight component. Sometimes multiple trace sources (e.g. multiple ETMs) would emit stream together. Components such as Embedded Trace Router, Embedded Trace FIFO are able to merge multiple streams into one stream with a format. 4 | 5 | The application here is not used in the work presented. However, it would be helpful for interested people. 6 | -------------------------------------------------------------------------------- /paper_imp/cfg/testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TEST=$1 4 | 5 | if [ -z $TEST ]; then 6 | echo "Specify the test to run"; exit 0 7 | fi 8 | 9 | if [ $TEST == "graph" ]; then 10 | APP=("disparity" "mser" "sift" "tracking" "texture_synthesis" "svm" "multi_ncut") 11 | for app in ${APP[@]}; do 12 | python3 lean_cfg.py --graph $app 13 | done 14 | fi 15 | 16 | if [ $TEST == "disparity" ]; then 17 | python3 lean_cfg.py --graph disparity 18 | fi -------------------------------------------------------------------------------- /paper_imp/tracee/README.md: -------------------------------------------------------------------------------- 1 | # Tracer Side Controller 2 | 3 | This includes the CoreSight component drivers and a wrapper application. 4 | 5 | ## CoreSight Component Drivers 6 | 7 | CoreSight Components include a variety of on-chip hardware. The files presenting here configure the components from user space and run a target application which would be monitored online. 8 | 9 | The files contain useful code for setting up CS component, configure the ETM, and configure different ETM filters. 10 | -------------------------------------------------------------------------------- /paper_imp/tracee/include/cs_config.h: -------------------------------------------------------------------------------- 1 | #ifndef CS_CONFIG_H 2 | #define CS_CONFIG_H 3 | 4 | void cs_config_etr(uint64_t buf_addr, uint32_t buf_size); 5 | void cs_config_etr_mp(uint64_t buf_addr, uint32_t buf_size); 6 | void config_etm(void); 7 | void config_etm_n(ETM_interface* etm_n, int stall, int id); 8 | void config_etm_addr_event_test(ETM_interface*, uint64_t, uint64_t, uint64_t, uint64_t); 9 | void config_etm_single_pmu_event_test(ETM_interface*, int event_bus_num); 10 | 11 | 12 | 13 | 14 | 15 | #endif -------------------------------------------------------------------------------- /paper_imp/tracee/include/argparse.h: -------------------------------------------------------------------------------- 1 | #ifndef ARGPARSE_H_ 2 | #define ARGPARSE_H_ 3 | 4 | #include 5 | enum ms_t { SEQUENCE, GRAPH }; 6 | void parse_args(int argc, char *argv[], char *app, char **app_farg, char *milestone_path, ms_t* ms_mode, uint64_t* start_addr, uint64_t* end_addr); 7 | void parse_args_mp(int argc, char *argv[], char *app, char **app_farg, 8 | char *milestone_path, ms_t* ms_mode, uint64_t* start_addr, uint64_t* end_addr, 9 | uint64_t* range_u, uint64_t* range_l, uint8_t* n_mp); 10 | 11 | #endif -------------------------------------------------------------------------------- /csc/main/hello_ETM.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | 6 | printf("Hello, ETM! pid: %d\n", getpid()); 7 | 8 | // write through a buffer to genreate some traffic 9 | // int size = 1024 * 1024 * 1024; // this is the demo size at the beginning. trying to get counter be meaningful 10 | int size = 1024; 11 | int sum = 0; 12 | char buffer[size]; 13 | for (int i = 0; i < size; i++) { 14 | buffer[i] = i; 15 | sum += buffer[i]; 16 | } 17 | 18 | printf("Bye, ETM!\n"); 19 | 20 | return 0; 21 | } -------------------------------------------------------------------------------- /ETM_data_parser/headers/handlers.h: -------------------------------------------------------------------------------- 1 | #ifndef HANDLERS_H_ 2 | #define HANDLERS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CTL_STATE_OUTSCOPE 0 9 | #define CTL_STATE_INSCOPE 1 10 | #define CTL_STATE_POP_COMP 2 11 | #define CTL_STATE_PUSH 3 12 | #define CTL_STATE_INIT 4 13 | 14 | #define ADDRESS_STACK_SIZE 1024 15 | 16 | typedef struct basicblock { 17 | uint32_t start_addr: 32; 18 | uint16_t r: 1; 19 | uint16_t l: 1; 20 | uint16_t s: 1; 21 | uint16_t c: 1; 22 | uint16_t reserved: 12; 23 | uint16_t offset: 16; 24 | } __attribute__((packed)) basicblock_t; 25 | 26 | void report(const char* format, ... ); 27 | 28 | void set_ctl_buff(void*, uint16_t); 29 | void report_addres(uint64_t, uint8_t); 30 | void report_atom(uint8_t); 31 | 32 | #endif // HANDLERS_H_ 33 | -------------------------------------------------------------------------------- /paper_imp/trc_parser_offline/headers/handlers.h: -------------------------------------------------------------------------------- 1 | #ifndef HANDLERS_H_ 2 | #define HANDLERS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CTL_STATE_OUTSCOPE 0 9 | #define CTL_STATE_INSCOPE 1 10 | #define CTL_STATE_POP_COMP 2 11 | #define CTL_STATE_PUSH 3 12 | #define CTL_STATE_INIT 4 13 | 14 | #define ADDRESS_STACK_SIZE 1024 15 | 16 | typedef struct basicblock { 17 | uint32_t start_addr: 32; 18 | uint16_t r: 1; 19 | uint16_t l: 1; 20 | uint16_t s: 1; 21 | uint16_t c: 1; 22 | uint16_t reserved: 12; 23 | uint16_t offset: 16; 24 | } __attribute__((packed)) basicblock_t; 25 | 26 | void report(const char* format, ... ); 27 | 28 | void set_ctl_buff(void*, uint16_t); 29 | void report_addres(uint64_t, uint8_t); 30 | void report_atom(uint8_t); 31 | 32 | #endif // HANDLERS_H_ 33 | -------------------------------------------------------------------------------- /paper_imp/tracee/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Iinclude -Wall 2 | SRC_FILES := $(shell find src/*.c) 3 | O_FILES := $(SRC_FIELSi:.c=.o) 4 | CC=g++ 5 | 6 | all: r5 7 | 8 | debug: 9 | gcc -DDEBUG $(CFLAG) $(SRC_FILES) main/start.cpp -o start 10 | 11 | r5: $(O_FILES) 12 | $(CC) -DR5 $(O_FILES) $(CFLAGS) main/start.cpp -o start 13 | $(CC) -DR5 $(O_FILES) $(CFLAGS) main/start_mp.cpp -o start_mp 14 | 15 | mp: $(SRC_FILES) main/start_mp.cpp 16 | $(CC) -DR5 $(SRC_FILES) $(CFLAGS) main/start_mp.cpp -o start_mp 17 | 18 | mig: 19 | g++ $(CFLAG) $(SRC_FILES) main/start.cpp -o start 20 | 21 | bench: 22 | g++ -Iinclude $(SRC_FILES) main/bench.cpp -Wall -o bench 23 | 24 | cc: 25 | g++ $(CFLAG) -no-pie $(SRC_FILES) main/pmcc.c -o pmcc 26 | objdump -d pmcc > pmcc.dp 27 | 28 | 29 | 30 | clean: 31 | rm -rf start 32 | rm -rf bench 33 | rm -rf pmcc 34 | rm -rf start_mp 35 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/sift.merge.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | 4213064 [label="0x404948"]; 7 | 4216956 [label="0x40587c"]; 8 | 4213064 -> 4216956; 9 | 4213184 [label="0x4049c0"]; 10 | 4213200 [label="0x4049d0"]; 11 | 4213184 -> 4213200; 12 | 4213500 [label="0x404afc"]; 13 | 4213200 -> 4213500; 14 | 4213540 [label="0x404b24"]; 15 | 4213500 -> 4213540; 16 | 4214532 [label="0x404f04"]; 17 | 4213540 -> 4214532; 18 | 4214484 [label="0x404ed4"]; 19 | 4213540 -> 4214484; 20 | 4214508 [label="0x404eec"]; 21 | 4214484 -> 4214508; 22 | 4214508 -> 4213500; 23 | 4217060 [label="0x4058e4"]; 24 | 4216956 -> 4217060; 25 | 4217080 [label="0x4058f8"]; 26 | 4217060 -> 4217080; 27 | 4217148 [label="0x40593c"]; 28 | 4217080 -> 4217148; 29 | 4217172 [label="0x405954"]; 30 | 4217148 -> 4217172; 31 | 4217172 -> 4213184; 32 | 4217172 -> 4217148; 33 | } 34 | -------------------------------------------------------------------------------- /csc/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Iinclude -Wall -O2 2 | LDFLAGS = -lpthread -no-pie 3 | SRC_FILES := $(wildcard src/*.c) 4 | O_FILES := $(patsubst src/%.c,src/%.o,$(SRC_FILES)) 5 | MAIN_FILES := start_mp start_etr hello_ETM start_sram start_etm_pmu start_cnt_pmu_event 6 | 7 | # Determine the compiler based on architecture 8 | ifeq ($(shell uname -m),aarch64) 9 | CC = gcc 10 | OBJDUMP = objdump 11 | else 12 | CC = aarch64-linux-gnu-gcc 13 | OBJDUMP = aarch64-linux-gnu-objdump 14 | endif 15 | 16 | all: $(MAIN_FILES) hello_ETM.dump 17 | 18 | # Target for the final executables 19 | $(MAIN_FILES): % : main/%.o $(O_FILES) 20 | $(CC) $^ $(LDFLAGS) -o $@ 21 | 22 | # Rule for compiling .c to .o in src/ 23 | src/%.o: src/%.c 24 | $(CC) $(CFLAGS) -c $< -o $@ 25 | 26 | # Rule for compiling .c to .o in main/ 27 | main/%.o: main/%.c 28 | $(CC) $(CFLAGS) -c $< -o $@ 29 | 30 | hello_ETM.dump: hello_ETM 31 | $(OBJDUMP) -d hello_ETM > hello_ETM.dump 32 | 33 | # Clean target 34 | clean: 35 | rm -f $(O_FILES) main/*.o $(MAIN_FILES) hello_ETM.dump 36 | -------------------------------------------------------------------------------- /csc/include/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H_ 2 | #define COMMON_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define SET(x, y) ((x) |= (1 << (y))) 12 | #define CHECK(x,y) (((x) & (1 << (y))) ? 1 : 0) 13 | #define CLEAR(x,y) ((x) &= ~(1 << (y))) 14 | 15 | int write_mem(unsigned long physical_address, uint32_t data); 16 | void pin_to_core(uint8_t id); 17 | void linux_disable_cpuidle(void); 18 | void spawn_child(void (*func)()); 19 | void poller(); 20 | uint32_t wrmem(char* bin_name, unsigned long addr); 21 | 22 | /* Initialize the memory at [buf_addr] for [buf_size] bytes with 0xffffffff 23 | When formatter is used, 0xffffffff is impossible to be emitted. Thus it serves as a marker for trace end. 24 | 25 | Known issue: 26 | When using the Ram Read Data register on ETR to drain the buffer, the buffer, in theory, 27 | does not need initialization. However, on some boards, it's observed that if the buffer is 28 | not written with 0xffffffff, the trace data will be corrupted. 29 | */ 30 | void clear_buffer(uint64_t buf_addr, uint32_t buf_size); 31 | 32 | #endif -------------------------------------------------------------------------------- /csc/util/dbg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Debugging script for CSC 4 | 5 | CSBASE=0xfe800000 6 | TMC1=0x140000 7 | TMC2=0x150000 8 | TMC3=0x170000 9 | AETM0=0x440000 10 | 11 | # TMC registers 12 | 13 | RSZ=0x004 14 | STS=0x00c 15 | RRD=0x010 16 | RRP=0x014 17 | RWP=0x018 18 | TRG=0x01C 19 | CTL=0x020 20 | RWD=0x024 21 | MODE=0x028 22 | RRPHI=0x038 23 | RWPHI=0x03C 24 | DBALO=0x118 25 | DBAHI=0x11C 26 | FFSR=0x300 27 | FFCR=0x304 28 | LAR=0xFB0 29 | LSR=0xFB4 30 | AXICTL=0x110 31 | 32 | # ETM registers 33 | 34 | ACVR0=0x400 35 | ACVR1=0x408 36 | ACVR2=0x410 37 | ACVR3=0x418 38 | ACVR4=0x420 39 | ACVR5=0x428 40 | ACVR6=0x430 41 | ACVR7=0x438 42 | 43 | 44 | function rd { 45 | local comp=$1 46 | local reg=$2 47 | local addr=$((CSBASE + comp + reg)) 48 | ./devmem $addr 49 | } 50 | 51 | function wr { 52 | local comp=$1 53 | local reg=$2 54 | local val=$3 55 | local addr=$((CSBASE + comp + reg)) 56 | ./devmem $addr w $val 57 | } 58 | 59 | # print ACVR0 - ACVR7 60 | function acvr { 61 | for i in {0..7}; do 62 | local addr=$((CSBASE + AETM0 + ACVR0 + i*8)) 63 | local addr2=$((CSBASE + AETM0 + ACVR0 + i*8 + 0x4)) 64 | ./devmem $addr 65 | ./devmem $addr2 66 | done 67 | } 68 | 69 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/zcu_cs.h: -------------------------------------------------------------------------------- 1 | #ifndef ZCU_CS_H 2 | #define ZCU_CS_H 3 | 4 | #define CS_BASE 0xFE800000 5 | 6 | #define ROM 0x0 7 | #define TSGEN 0x100000 8 | #define FUNNEL0 0x110000 9 | #define FUNNEL1 0x120000 10 | #define FUNNEL2 0x130000 11 | #define TMC1 0x140000 12 | #define TMC2 0x150000 13 | #define REPLIC 0x160000 14 | #define TMC3 0x170000 15 | #define TPIU 0x180000 16 | #define CTI0 0x190000 17 | #define CTI1 0x1A0000 18 | #define CTI2 0x1B0000 19 | #define STM 0x1C0000 20 | #define FTM 0x1D0000 21 | 22 | #define R5_ROM 0x3E0000 23 | #define R5_0_DEBUG 0x3F0000 24 | #define R5_1_DEBUG 0x3F2000 25 | #define R5_0_CTI 0x3F8000 26 | #define R5_1_CTI 0x3F9000 27 | #define R5_0_ETM 0x3FC000 28 | #define R5_1_ETM 0x3FD000 29 | 30 | #define A53_ROM 0x400000 31 | #define A53_0_DEBUG 0x410000 32 | #define A53_0_CTI 0x420000 33 | #define A53_0_PMU 0x430000 34 | #define A53_0_ETM 0x440000 35 | #define A53_1_DEBUG 0x510000 36 | #define A53_1_CTI 0x520000 37 | #define A53_1_PMU 0x530000 38 | #define A53_1_ETM 0x540000 39 | #define A53_2_DEBUG 0x610000 40 | #define A53_2_CTI 0x620000 41 | #define A53_2_PMU 0x630000 42 | #define A53_2_ETM 0x640000 43 | #define A53_3_DEBUG 0x710000 44 | #define A53_3_CTI 0x720000 45 | #define A53_3_PMU 0x730000 46 | #define A53_3_ETM 0x740000 47 | 48 | #define TMCTRG 0x01C 49 | #define FFCR 0x304 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /paper_imp/cfg/setting.py: -------------------------------------------------------------------------------- 1 | def populate(benchmark, tar_rt=None): 2 | if benchmark == 'sift': 3 | tar_bin = benchmark + '.dp' 4 | if tar_rt: 5 | tar_rt= tar_rt 6 | else: 7 | tar_rt = benchmark 8 | tar_strips = [] 9 | for i in range(1): 10 | tar_strips.append(f'sift.{i}.strip') 11 | 12 | elif benchmark == 'mser': 13 | tar_bin = 'mser.dp' 14 | tar_rt = 'mser' 15 | tar_strips = [] 16 | #for i in range(11): 17 | for i in range(1): 18 | tar_strips.append(f'mser.{i}.strip') 19 | 20 | elif benchmark == 'disparity': 21 | tar_bin = 'disparity.dp' 22 | tar_rt = 'getDisparity' 23 | tar_strips = [] 24 | for i in range(9): 25 | tar_strips.append(f'disparity.{i}.strip') 26 | 27 | # dbg purpose 28 | # tar_strips = ['disparity.0.strip'] 29 | 30 | elif benchmark == 'texture_synthesis': 31 | tar_bin = benchmark + '.dp' 32 | tar_rt = 'create_texture' 33 | tar_strips = [] 34 | for i in range(10): 35 | tar_strips.append(f'texture_synthesis.{i}.strip') 36 | 37 | elif benchmark == 'tracking': 38 | tar_bin = benchmark + '.dp' 39 | tar_rt = 'main' 40 | tar_strips = ['tracking.range.0.strip'] 41 | tar_strips = ['tracking.0.strip'] 42 | 43 | else: 44 | pass 45 | 46 | return tar_bin, tar_rt, tar_strips 47 | -------------------------------------------------------------------------------- /paper_imp/tracee/src/cs_soc.c: -------------------------------------------------------------------------------- 1 | #include "zcu_cs.h" 2 | #include "cs_soc.h" 3 | #include 4 | #include 5 | 6 | void funnel_config_port(Funnel_interface *funnel, uint8_t mask, int hold_time) 7 | { 8 | funnel->ctrl = 0; 9 | funnel->ctrl |= (mask & 0xff) ; 10 | if (hold_time > 0b1110) { 11 | printf("WARNING: invalid hold time, choose from 0b0..0b1110. Auto set to 0b0"); 12 | } else { 13 | funnel->ctrl |= hold_time << 8; 14 | } 15 | } 16 | 17 | void tmc_set_mode(TMC_interface* tmc, enum tmc_mode mode) { 18 | tmc->mode = 0x0; 19 | tmc->mode |= mode; 20 | } 21 | 22 | void tmc_set_size(TMC_interface *tmc, uint32_t ram_size) 23 | { 24 | if(ram_size > 256*1024*1024) { 25 | printf("WARNING: TMC RAM set size is greater than 256MB\n"); 26 | } 27 | tmc->ram_size = ram_size/4 ; 28 | } 29 | 30 | void tmc_set_data_buf(TMC_interface *tmc, uint64_t addr) 31 | { 32 | tmc->data_buf_addr_low = (uint32_t) addr ; 33 | tmc->data_buf_addr_high = (uint32_t) (addr >> 32); 34 | } 35 | 36 | void tmc_set_axi(TMC_interface *tmc, int burst_len) 37 | { 38 | tmc->axi_ctrl = 0; 39 | tmc->axi_ctrl |= burst_len & 0xf << 8 ; 40 | CLEAR(tmc->axi_ctrl, 7); 41 | } 42 | 43 | void tmc_set_read_pt(TMC_interface *tmc, uint64_t addr) 44 | { 45 | tmc->ram_read_pt = (uint32_t) addr; 46 | tmc->ram_read_pt_high = (uint32_t) (addr >> 32); 47 | } 48 | 49 | void tmc_set_write_pt(TMC_interface *tmc, uint64_t addr) 50 | { 51 | tmc->ram_write_pt = (uint32_t) addr; 52 | tmc->ram_write_pt_high = (uint32_t) (addr >> 32); 53 | } 54 | 55 | 56 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/tracking.reduced.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | 4196776 [label="0x4009a8,0,0x0"]; 7 | 4196784 [label="0x4009b0,133480,0x20968"]; 8 | 4196776 -> 4196784 [label="133480,0x20968"]; 9 | 4196820 [label="0x4009d4,169231,0x2950f"]; 10 | 4196784 -> 4196820 [label="35767,0x8bb7"]; 11 | 4196828 [label="0x4009dc,180930,0x2c2c2"]; 12 | 4196820 -> 4196828 [label="11721,0x2dc9"]; 13 | 4196840 [label="0x4009e8,200420,0x30ee4"]; 14 | 4196828 -> 4196840 [label="19497,0x4c29"]; 15 | 4196852 [label="0x4009f4,223396,0x368a4"]; 16 | 4196840 -> 4196852 [label="23067,0x5a1b"]; 17 | 4196876 [label="0x400a0c,303255,0x4a097"]; 18 | 4196852 -> 4196876 [label="79912,0x13828"]; 19 | 4196900 [label="0x400a24,310136,0x4bb78"]; 20 | 4196876 -> 4196900 [label="6915,0x1b03"]; 21 | 4196916 [label="0x400a34,334463,0x51a7f"]; 22 | 4196900 -> 4196916 [label="24536,0x5fd8"]; 23 | 4196924 [label="0x400a3c,334473,0x51a89"]; 24 | 4196916 -> 4196924 [label="10,0xa"]; 25 | 4196936 [label="0x400a48,334574,0x51aee"]; 26 | 4196924 -> 4196936 [label="179,0xb3"]; 27 | 4197156 [label="0x400b24,339046,0x52c66"]; 28 | 4196936 -> 4197156 [label="4491,0x118b"]; 29 | 4197236 [label="0x400b74,1012490,0xf730a"]; 30 | 4197156 -> 4197236 [label="163670,0x27f56"]; 31 | 4197484 [label="0x400c6c,1118278,0x111046"]; 32 | 4197236 -> 4197484 [label="106313,0x19f49"]; 33 | 4197720 [label="0x400d58,1389032,0x1531e8"]; 34 | 4197484 -> 4197720 [label="283481,0x45359"]; 35 | 4197720 -> 4197236 [label="165522,0x28692"]; 36 | 4197916 [label="0x400e1c,1390811,0x1538db"]; 37 | 4197720 -> 4197916 [label="1822,0x71e"]; 38 | } 39 | -------------------------------------------------------------------------------- /csc/include/pmu_event.h: -------------------------------------------------------------------------------- 1 | #ifndef PMU_EVENT_H 2 | #define PMU_EVENT_H 3 | 4 | // PMU EVENT and External Bus to ETM (post fix _T) 5 | 6 | #define L1I_CACHE_REFILL 0x1 7 | #define L1I_CACHE_REFILL_T 0 8 | #define L1I_TLB_REFILL 0x2 9 | #define L1I_TLB_REFILL_T 1 10 | #define L1D_CACHE_REFILL 0x3 11 | #define L1D_CACHE_REFILL_T 2 12 | #define L1D_CACHE 0x4 13 | #define L1D_CACHE_T 3 14 | #define INST_RETIRED 0x8 15 | #define INST_RETIRED_T 7 16 | #define L1I_CACHE 0x14 17 | #define L1I_CACHE_T 18 18 | #define L1D_CACHE_WB 0x15 19 | #define L1D_CACHE_WB_T 19 20 | #define L2D_CACHE 0x16 21 | #define L2D_CACHE_T 20 22 | #define L2D_CACHE_REFILL 0x17 23 | #define L2D_CACHE_REFILL_T 21 24 | #define L2D_CACHE_WB 0x18 25 | #define L2D_CACHE_WB_T 22 26 | #define LD_RETIRED 0x6 27 | #define LD_RETIRED_T 5 28 | #define ST_RETIRED 0x7 29 | #define ST_RETIRED_T 6 30 | #define EXC_TAKEN 0x9 31 | #define EXC_TAKEN_T 9 32 | #define EXC_RETURN 0xa 33 | #define EXC_RETURN_T 10 34 | #define CID_WRITE_RETIRED 0xb 35 | #define CID_WRITE_RETIRED_T 11 36 | #define PC_WRITE_RETIRED 0xc 37 | #define PC_WRITE_RETIRED_T 12 38 | #define BR_IMMED_RETIRED 0xd 39 | #define BR_IMMED_RETIRED_T 13 40 | #define UNALIGNED_LDST_RETIRED 0xf 41 | #define UNALIGNED_LDST_RETIRED_T 14 42 | #define BR_MIS_PRED 0x10 43 | #define BR_MIS_PRED_T 15 44 | #define BR_PRED 0x12 45 | #define BR_PRED_T 16 46 | #define MEM_ACCESS 0x13 47 | #define MEM_ACCESS_T 17 48 | #define L1I_CACHE_ERR 0xd0 49 | #define L1I_CACHE_ERR_T 23 50 | #define L1D_CACHE_ERR 0xd1 51 | #define L1D_CACHE_ERR_T 24 52 | #define TLB_MEM_ERR 0xd2 53 | #define TLB_MEM_ERR_T 25 54 | 55 | 56 | #endif -------------------------------------------------------------------------------- /paper_imp/tracee/include/pmu_event.h: -------------------------------------------------------------------------------- 1 | #ifndef PMU_EVENT_H 2 | #define PMU_EVENT_H 3 | 4 | // PMU EVENT and External Bus to ETM (post fix _T) 5 | 6 | #define L1I_CACHE_REFILL 0x1 7 | #define L1I_CACHE_REFILL_T 0 8 | #define L1I_TLB_REFILL 0x2 9 | #define L1I_TLB_REFILL_T 1 10 | #define L1D_CACHE_REFILL 0x3 11 | #define L1D_CACHE_REFILL_T 2 12 | #define L1D_CACHE 0x4 13 | #define L1D_CACHE_T 3 14 | #define INST_RETIRED 0x8 15 | #define INST_RETIRED_T 7 16 | #define L1I_CACHE 0x14 17 | #define L1I_CACHE_T 18 18 | #define L1D_CACHE_WB 0x15 19 | #define L1D_CACHE_WB_T 19 20 | #define L2D_CACHE 0x16 21 | #define L2D_CACHE_T 20 22 | #define L2D_CACHE_REFILL 0x17 23 | #define L2D_CACHE_REFILL_T 21 24 | #define L2D_CACHE_WB 0x18 25 | #define L2D_CACHE_WB_T 22 26 | #define LD_RETIRED 0x6 27 | #define LD_RETIRED_T 5 28 | #define ST_RETIRED 0x7 29 | #define ST_RETIRED_T 6 30 | #define EXC_TAKEN 0x9 31 | #define EXC_TAKEN_T 9 32 | #define EXC_RETURN 0xa 33 | #define EXC_RETURN_T 10 34 | #define CID_WRITE_RETIRED 0xb 35 | #define CID_WRITE_RETIRED_T 11 36 | #define PC_WRITE_RETIRED 0xc 37 | #define PC_WRITE_RETIRED_T 12 38 | #define BR_IMMED_RETIRED 0xd 39 | #define BR_IMMED_RETIRED_T 13 40 | #define UNALIGNED_LDST_RETIRED 0xf 41 | #define UNALIGNED_LDST_RETIRED_T 14 42 | #define BR_MIS_PRED 0x10 43 | #define BR_MIS_PRED_T 15 44 | #define BR_PRED 0x12 45 | #define BR_PRED_T 16 46 | #define MEM_ACCESS 0x13 47 | #define MEM_ACCESS_T 17 48 | #define L1I_CACHE_ERR 0xd0 49 | #define L1I_CACHE_ERR_T 23 50 | #define L1D_CACHE_ERR 0xd1 51 | #define L1D_CACHE_ERR_T 24 52 | #define TLB_MEM_ERR 0xd2 53 | #define TLB_MEM_ERR_T 25 54 | 55 | 56 | #endif -------------------------------------------------------------------------------- /paper_imp/tracer/src/platform.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * Use of the Software is limited solely to applications: 16 | * (a) running on a Xilinx device, or 17 | * (b) that interact with a Xilinx device through a bus or interconnect. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 24 | * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | * Except as contained in this notice, the name of the Xilinx shall not be used 28 | * in advertising or otherwise to promote the sale, use or other dealings in 29 | * this Software without prior written authorization from Xilinx. 30 | * 31 | ******************************************************************************/ 32 | 33 | #ifndef __PLATFORM_H_ 34 | #define __PLATFORM_H_ 35 | 36 | #include "platform_config.h" 37 | 38 | void init_platform(); 39 | void cleanup_platform(); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /paper_imp/cfg/msg_draw.py: -------------------------------------------------------------------------------- 1 | # This file provides a method to construct a milestone graph manually 2 | # Combined with the visualization of the trace data on applications, this can be done meaningfully 3 | # as longs as the number of unfolding function is not large 4 | 5 | import networkx as nx 6 | 7 | def draw_tracking(): 8 | g = nx.DiGraph() 9 | spine = [0x4009a8, 0x4009b0, 0x4009d4, 0x4009dc, 0x4009e8, 0x4009f4, 0x400a0c, 0x400a24, 0x400a34, 0x400a3c, 0x400a48] 10 | 11 | for i in range(len(spine)): 12 | if i==0: 13 | pass 14 | else: 15 | g.add_edge(spine[i-1],spine[i]) 16 | 17 | g.add_edge(spine[-1], 0x400b24) 18 | g.add_edge(0x400b24, 0x400b74) 19 | g.add_edge(0x400b74, 0x400c6c) 20 | # g.add_edge(0x400c6c, 0x400c74) 21 | g.add_edge(0x400c6c, 0x400d58) 22 | 23 | # g.add_edge(0x400c74, 0x400d44) 24 | 25 | # g.add_edge(0x400c74, 0x400d58) 26 | g.add_edge(0x400d58, 0x400b74) 27 | g.add_edge(0x400d58, 0x400e1c) 28 | 29 | return g 30 | 31 | 32 | def draw_texture(): 33 | g = nx.DiGraph() 34 | spine = [0x405010, 0x4054e4, 0x4051d8, 0x4052a8, 0x40541c] 35 | for i in range(len(spine)): 36 | if i==0: 37 | pass 38 | else: 39 | g.add_edge(spine[i-1],spine[i]) 40 | 41 | # loops 42 | sloops = [0x4051d8, 0x4052a8] 43 | for addr in sloops: 44 | g.add_edge(addr, addr) 45 | return g 46 | 47 | 48 | def draw_precision(): 49 | g = nx.DiGraph() 50 | #g.add_edge(0x400994, 0x400910) 51 | #g.add_edge(0x400910, 0x400910) 52 | #g.add_edge(0x400910, 0x400c1c) 53 | 54 | g.add_edge(0x400854, 0x4007d0) 55 | g.add_edge(0x4007d0, 0x4007d0) 56 | g.add_edge(0x4007d0, 0x4009f0) 57 | 58 | return g 59 | 60 | def draw_response(): 61 | g = nx.DiGraph() 62 | g.add_edge(0x402d2c, 0x402e58) 63 | g.add_edge(0x402e58, 0x402e58) 64 | g.add_edge(0x402e58, 0x402f00) 65 | return g 66 | 67 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/texture_synthesis.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x405010 - 0x40504c type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lcreate_texture 405010 stp 10 | \lcreate_texture 405014 mov 11 | \lcreate_texture \ 12 | 405018 stp 13 | \lcreate_texture 40501c mov 14 | \lcreate_texture 405020 stp 15 | \lcreate_texture 405024 mov 16 | \lcreate_texture 405028 ldp 17 | \lcreate_\ 18 | texture 40502c stp 19 | \lcreate_texture 405030 str 20 | \lcreate_texture 405034 stp 21 | \lcreate_texture 405038 madd 22 | \lcreate_texture \ 23 | 40503c stp 24 | \lcreate_texture 405040 add 25 | \lcreate_texture 405044 sbfiz 26 | \lcreate_texture 405048 mov 27 | \lcreate_texture 40504c \ 28 | bl E:400920 29 | \lmalloc@plt"]; 30 | "BB 0x4054e4 - 0x4054e8 type: b" [label="*** Reason for MS *** 31 | \l12700814 has exceeded thresh 10000 32 | \l*** Block Info *** 33 | \lcreate_texture 4054e4 ldr 34 | \lcreate_texture 4054e8 \ 35 | b E:405074 "]; 36 | "BB 0x405010 - 0x40504c type: bl" -> "BB 0x4054e4 - 0x4054e8 type: b" [label="{}"]; 37 | "BB 0x4051f4 - 0x4051f8 type: b.ge" [label="*** Reason for MS *** 38 | \la valid valve 39 | \l*** Block Info *** 40 | \lcreate_texture 4051f4 cmp 41 | \lcreate_texture 4051f8 b.ge E:40530c "]; 42 | "BB 0x4054e4 - 0x4054e8 type: b" -> "BB 0x4051f4 - 0x4051f8 type: b.ge" [label="{}"]; 43 | "BB 0x40541c - 0x40542c type: b.lt" [label="*** Reason for MS *** 44 | \la valid valve 45 | \l*** Block Info *** 46 | \lcreate_texture 40541c ldr 47 | \lcreate_texture 405420 add 48 | \lcreate_\ 49 | texture 405424 add 50 | \lcreate_texture 405428 cmp 51 | \lcreate_texture 40542c b.lt E:405324 "]; 52 | "BB 0x4051f4 - 0x4051f8 type: b.ge" -> "BB 0x40541c - 0x40542c type: b.lt" [label="{}"]; 53 | } 54 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/texture_synthesis.create_texture.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x405010 - 0x40504c type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lcreate_texture 405010 stp 10 | \lcreate_texture 405014 mov 11 | \lcreate_texture \ 12 | 405018 stp 13 | \lcreate_texture 40501c mov 14 | \lcreate_texture 405020 stp 15 | \lcreate_texture 405024 mov 16 | \lcreate_texture 405028 ldp 17 | \lcreate_\ 18 | texture 40502c stp 19 | \lcreate_texture 405030 str 20 | \lcreate_texture 405034 stp 21 | \lcreate_texture 405038 madd 22 | \lcreate_texture \ 23 | 40503c stp 24 | \lcreate_texture 405040 add 25 | \lcreate_texture 405044 sbfiz 26 | \lcreate_texture 405048 mov 27 | \lcreate_texture 40504c \ 28 | bl E:400920 29 | \lmalloc@plt"]; 30 | "BB 0x4054e4 - 0x4054e8 type: b" [label="*** Reason for MS *** 31 | \l12700814 has exceeded thresh 10000 32 | \l*** Block Info *** 33 | \lcreate_texture 4054e4 ldr 34 | \lcreate_texture 4054e8 \ 35 | b E:405074 "]; 36 | "BB 0x405010 - 0x40504c type: bl" -> "BB 0x4054e4 - 0x4054e8 type: b" [label="{}"]; 37 | "BB 0x4051f4 - 0x4051f8 type: b.ge" [label="*** Reason for MS *** 38 | \la valid valve 39 | \l*** Block Info *** 40 | \lcreate_texture 4051f4 cmp 41 | \lcreate_texture 4051f8 b.ge E:40530c "]; 42 | "BB 0x4054e4 - 0x4054e8 type: b" -> "BB 0x4051f4 - 0x4051f8 type: b.ge" [label="{}"]; 43 | "BB 0x40541c - 0x40542c type: b.lt" [label="*** Reason for MS *** 44 | \la valid valve 45 | \l*** Block Info *** 46 | \lcreate_texture 40541c ldr 47 | \lcreate_texture 405420 add 48 | \lcreate_\ 49 | texture 405424 add 50 | \lcreate_texture 405428 cmp 51 | \lcreate_texture 40542c b.lt E:405324 "]; 52 | "BB 0x4051f4 - 0x4051f8 type: b.ge" -> "BB 0x40541c - 0x40542c type: b.lt" [label="{}"]; 53 | } 54 | -------------------------------------------------------------------------------- /paper_imp/tracee/include/zcu_cs.h: -------------------------------------------------------------------------------- 1 | #ifndef ZCU_CS_H 2 | #define ZCU_CS_H 3 | 4 | #define CS_BASE 0xFE800000 5 | 6 | #define ROM 0x0 7 | #define TSGEN 0x100000 8 | #define FUNNEL0 0x110000 9 | #define FUNNEL1 0x120000 10 | #define FUNNEL2 0x130000 11 | #define TMC1 0x140000 12 | #define TMC2 0x150000 13 | #define REPLIC 0x160000 14 | #define TMC3 0x170000 15 | #define TPIU 0x180000 16 | #define CTI0 0x190000 17 | #define CTI1 0x1A0000 18 | #define CTI2 0x1B0000 19 | #define STM 0x1C0000 20 | #define FTM 0x1D0000 21 | 22 | #define R5_ROM 0x3E0000 23 | #define R5_0_DEBUG 0x3F0000 24 | #define R5_1_DEBUG 0x3F2000 25 | #define R5_0_CTI 0x3F8000 26 | #define R5_1_CTI 0x3F9000 27 | #define R5_0_ETM 0x3FC000 28 | #define R5_1_ETM 0x3FD000 29 | 30 | 31 | #define R5_0_ATCM 0xFFE00000 32 | #define R5_0_BTCM 0xFFE20000 33 | 34 | #define A53_ROM 0x400000 35 | #define A53_0_DEBUG 0x410000 36 | #define A53_0_CTI 0x420000 37 | #define A53_0_PMU 0x430000 38 | #define A53_0_ETM 0x440000 39 | #define A53_1_DEBUG 0x510000 40 | #define A53_1_CTI 0x520000 41 | #define A53_1_PMU 0x530000 42 | #define A53_1_ETM 0x540000 43 | #define A53_2_DEBUG 0x610000 44 | #define A53_2_CTI 0x620000 45 | #define A53_2_PMU 0x630000 46 | #define A53_2_ETM 0x640000 47 | #define A53_3_DEBUG 0x710000 48 | #define A53_3_CTI 0x720000 49 | #define A53_3_PMU 0x730000 50 | #define A53_3_ETM 0x740000 51 | 52 | #define CHECK(r,p) ( (r & 0x1 << (p) ) >> (p) ) 53 | #define SET(r,p) ( r |= 0x1 << (p) ) 54 | #define CLEAR(r,p) ( r &= ~(0x1 << (p) ) ) 55 | 56 | enum component { 57 | Rom, 58 | Tsgen, 59 | Funnel0, 60 | Funnel1, 61 | Funnel2, 62 | Tmc1, 63 | Tmc2, 64 | Tmc3, 65 | Replic, 66 | Tpiu, 67 | Cti0, 68 | Cti1, 69 | Cti2, 70 | Stm, 71 | Ftm, 72 | R5_rom, 73 | R5_0_debug, 74 | R5_1_debug, 75 | R5_0_cti, 76 | R5_1_cti, 77 | R5_0_etm, 78 | R5_1_etm, 79 | A53_rom, 80 | A53_0_debug, 81 | A53_0_cti, 82 | A53_0_pmu, 83 | A53_0_etm, 84 | A53_1_etm, 85 | A53_2_etm, 86 | A53_3_etm, 87 | A53_1_pmu, 88 | A53_2_pmu, 89 | A53_3_pmu 90 | }; 91 | 92 | void* cs_register(enum component); 93 | 94 | #endif -------------------------------------------------------------------------------- /csc/include/cs_pmu.h: -------------------------------------------------------------------------------- 1 | #ifndef CS_PMU 2 | #define CS_PMU 3 | #include 4 | 5 | #define PMCCNTR_EL0 0xf8 6 | #define PMCCFILTR_EL0 0x47c 7 | 8 | #define PAD( start, end ) JOIN( char pad , __COUNTER__ [end - start] ) 9 | #define JOIN( symbol1, symbol2 ) _DO_JOIN( symbol1, symbol2 ) 10 | #define _DO_JOIN( symbol1, symbol2 ) symbol1##symbol2 11 | 12 | typedef struct __attribute__((__packed__)) PMU_interface { 13 | uint32_t evt_ct_0; 14 | PAD(0x4, 0x8); 15 | uint32_t evt_ct_1; 16 | PAD(0xc,0x10); 17 | uint32_t evt_ct_2; 18 | PAD(0x14,0x18); 19 | uint32_t evt_ct_3; 20 | PAD(0x1c,0x20); 21 | uint32_t evt_ct_4; 22 | PAD(0x24,0x28); 23 | uint32_t evt_ct_5; 24 | PAD(0x2c,0xf8); 25 | uint64_t cc; 26 | PAD(0x100,0x400); 27 | uint32_t evt_t_0; 28 | uint32_t evt_t_1; 29 | uint32_t evt_t_2; 30 | uint32_t evt_t_3; 31 | uint32_t evt_t_4; 32 | uint32_t evt_t_5; 33 | PAD(0x418,0x47c); 34 | uint32_t cc_filter; 35 | PAD(0x480,0xc00); 36 | uint32_t ct_en_set; 37 | PAD(0xc04,0xc20); 38 | uint32_t ct_en_clear; 39 | PAD(0xc24,0xc40); 40 | uint32_t int_en_set; 41 | PAD(0xc44,0xc60); 42 | uint32_t int_en_clear; 43 | PAD(0xc64,0xc80); 44 | uint32_t overflow_flag_status; 45 | PAD(0xc84,0xca0); 46 | uint32_t software_inc; 47 | PAD(0xca4,0xcc0); 48 | uint32_t overflow_flag_status_set; 49 | PAD(0xcc4,0xe00); 50 | uint32_t config; 51 | uint32_t ctrl; 52 | PAD(0xe08,0xe20); 53 | uint32_t comm_evt_id_0; 54 | uint32_t comm_evt_id_1; 55 | PAD(0xe28,0xfa8); 56 | int32_t dev_aff_0; 57 | int32_t dev_aff_1; 58 | int32_t lock_access; 59 | int32_t lock_status; 60 | int32_t auth_status; 61 | int32_t dev_arch; 62 | PAD(0xfc0,0xfcc); 63 | int32_t dev_type; 64 | int32_t peripheral_id_4; 65 | int32_t peripheral_id_5; 66 | int32_t peripheral_id_6; 67 | int32_t peripheral_id_7; 68 | int32_t peripheral_id_0; 69 | int32_t peripheral_id_1; 70 | int32_t peripheral_id_2; 71 | int32_t peripheral_id_3; 72 | int32_t comp_id_0; 73 | int32_t comp_id_1; 74 | int32_t comp_id_2; 75 | int32_t comp_id_3; 76 | 77 | 78 | } PMU_interface ; 79 | 80 | 81 | #endif -------------------------------------------------------------------------------- /paper_imp/tracee/main/bench.cpp: -------------------------------------------------------------------------------- 1 | #include "buffer.h" 2 | #include "cs_etm.h" 3 | #include "cs_config.h" 4 | #include "cs_soc.h" 5 | #include "pmu_event.h" 6 | #include "zcu_cs.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | using namespace std; 25 | 26 | extern ETM_interface *etm; 27 | 28 | int main(int argc, char *argv[]) { 29 | 30 | uint64_t buf_addr = 0xb0000000; 31 | uint32_t buf_size = 256 * 1024 * 1024; 32 | 33 | char app[256]; 34 | char *app_farg = NULL; 35 | char ms_path[256]; 36 | uint64_t start_addr, end_addr; 37 | ms_t ms_mode; 38 | 39 | parse_args(argc, argv, app, &app_farg, ms_path, &ms_mode, &start_addr, &end_addr); 40 | clear_buffer(buf_addr, buf_size); 41 | 42 | // config Coresight infrascture 43 | cs_config_etr(buf_addr, buf_size); 44 | config_etm(); 45 | 46 | // child would execl the target application, parent would wait till finish and 47 | // collect the results 48 | pid_t pid = 0; 49 | pid = fork(); 50 | if (pid == 0) { 51 | 52 | // pin child to core 0 53 | cpu_set_t set; 54 | CPU_ZERO(&set); 55 | CPU_SET(0, &set); 56 | sched_setaffinity(0, sizeof(cpu_set_t), &set); 57 | sched_yield(); 58 | 59 | // ETM only trace child pid 60 | uint64_t child_pid = getpid(); 61 | etm_set_contextid_cmp(etm, (uint64_t)child_pid); 62 | 63 | etm_register_start_stop_addr(etm, start_addr, end_addr); 64 | etm_set_stall(etm, 0b1111); // for mser, we need this to be accurate 65 | 66 | // enable ETM and run the application 67 | etm_enable(etm); 68 | execl(app, app, app_farg, NULL); 69 | fprintf(stderr, "ERROR: execl failed.\n"); 70 | 71 | } else if (pid > 0) { 72 | wait(NULL); 73 | sleep(1); 74 | etm_disable(etm); 75 | dump_buffer(buf_addr, buf_size); 76 | return 0; 77 | } else { 78 | perror("Fork failed\n"); 79 | return 1; 80 | } 81 | 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /paper_imp/tracee/include/cs_pmu.h: -------------------------------------------------------------------------------- 1 | #ifndef CS_PMU 2 | #define CS_PMU 3 | #include 4 | 5 | #define PMCCNTR_EL0 0xf8 6 | #define PMCCFILTR_EL0 0x47c 7 | 8 | #define PAD( start, end ) JOIN( char pad , __COUNTER__ [end - start] ) 9 | #define JOIN( symbol1, symbol2 ) _DO_JOIN( symbol1, symbol2 ) 10 | #define _DO_JOIN( symbol1, symbol2 ) symbol1##symbol2 11 | 12 | typedef struct __attribute__((__packed__)) pmu_interface { 13 | uint32_t evt_ct_0; 14 | PAD(0x4, 0x8); 15 | uint32_t evt_ct_1; 16 | PAD(0xc,0x10); 17 | uint32_t evt_ct_2; 18 | PAD(0x14,0x18); 19 | uint32_t evt_ct_3; 20 | PAD(0x1c,0x20); 21 | uint32_t evt_ct_4; 22 | PAD(0x24,0x28); 23 | uint32_t evt_ct_5; 24 | PAD(0x2c,0xf8); 25 | uint64_t cc; 26 | PAD(0x100,0x400); 27 | uint32_t evt_t_0; 28 | uint32_t evt_t_1; 29 | uint32_t evt_t_2; 30 | uint32_t evt_t_3; 31 | uint32_t evt_t_4; 32 | uint32_t evt_t_5; 33 | PAD(0x418,0x47c); 34 | uint32_t cc_filter; 35 | PAD(0x480,0xc00); 36 | uint32_t ct_en_set; 37 | PAD(0xc04,0xc20); 38 | uint32_t ct_en_clear; 39 | PAD(0xc24,0xc40); 40 | uint32_t int_en_set; 41 | PAD(0xc44,0xc60); 42 | uint32_t int_en_clear; 43 | PAD(0xc64,0xc80); 44 | uint32_t overflow_flag_status; 45 | PAD(0xc84,0xca0); 46 | uint32_t software_inc; 47 | PAD(0xca4,0xcc0); 48 | uint32_t overflow_flag_status_set; 49 | PAD(0xcc4,0xe00); 50 | uint32_t config; 51 | uint32_t ctrl; 52 | PAD(0xe08,0xe20); 53 | uint32_t comm_evt_id_0; 54 | uint32_t comm_evt_id_1; 55 | PAD(0xe28,0xfa8); 56 | int32_t dev_aff_0; 57 | int32_t dev_aff_1; 58 | int32_t lock_access; 59 | int32_t lock_status; 60 | int32_t auth_status; 61 | int32_t dev_arch; 62 | PAD(0xfc0,0xfcc); 63 | int32_t dev_type; 64 | int32_t peripheral_id_4; 65 | int32_t peripheral_id_5; 66 | int32_t peripheral_id_6; 67 | int32_t peripheral_id_7; 68 | int32_t peripheral_id_0; 69 | int32_t peripheral_id_1; 70 | int32_t peripheral_id_2; 71 | int32_t peripheral_id_3; 72 | int32_t comp_id_0; 73 | int32_t comp_id_1; 74 | int32_t comp_id_2; 75 | int32_t comp_id_3; 76 | 77 | 78 | } PMU_interface ; 79 | 80 | 81 | #endif -------------------------------------------------------------------------------- /paper_imp/tracee/src/buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | uint32_t *get_buf_ptr(uint64_t buf_addr, uint32_t buf_size) 9 | { 10 | void* ptr = NULL; 11 | int fd = open("/dev/mem", O_RDWR); 12 | if (fd < 0) { 13 | perror("Cannot open /dev/mem\n"); 14 | exit(1); 15 | } 16 | ptr = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, buf_addr); 17 | if (ptr == MAP_FAILED) 18 | fprintf(stderr,"mmap to buffer failed!\n"); 19 | close(fd); 20 | return (uint32_t *) ptr; 21 | } 22 | 23 | void clear_buffer(uint64_t buf_addr, uint32_t buf_size) 24 | { 25 | printf("Clearing Buffer...\n"); 26 | uint32_t *ptr = get_buf_ptr(buf_addr, buf_size); 27 | volatile uint32_t *buf = ptr; 28 | for(uint32_t i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "trace.h" 8 | 9 | extern void trace_loop(void); 10 | 11 | static uint8_t * trace_buffer; 12 | static uint32_t buffer_size; 13 | static uint32_t buffer_pointer; 14 | 15 | uint32_t read_data(uint8_t* buffer, uint32_t bytes, uint8_t advance_pointer) { 16 | uint32_t read; 17 | 18 | for (read = 0; read < bytes && buffer_pointer + read < buffer_size; ++read) { 19 | buffer[read] = trace_buffer[buffer_pointer + read]; 20 | } 21 | 22 | if (advance_pointer) { 23 | buffer_pointer += read; 24 | } 25 | 26 | return read; 27 | } 28 | 29 | uint32_t advance_pointer(uint32_t offset) { 30 | buffer_pointer += offset; 31 | if (buffer_pointer >= buffer_size) 32 | buffer_pointer = buffer_size; 33 | 34 | return buffer_pointer; 35 | } 36 | 37 | uint8_t data_available() { 38 | return buffer_pointer < buffer_size; 39 | } 40 | 41 | int main(int argc, char const *argv[]) { 42 | //int ctl_flow_fd; 43 | //struct stat ctl_flow_stat; 44 | //void * ctl_ptr; 45 | FILE * trace_file; 46 | char * line = NULL; 47 | unsigned int line_hex; 48 | size_t len = 0; 49 | ssize_t read; 50 | 51 | if (argc < 2) { 52 | fprintf(stderr, "Usage: ./ctrace [trace_input_file]\n"); 53 | exit(EXIT_FAILURE); 54 | } 55 | 56 | trace_buffer = (uint8_t *) malloc(256 * 1024 * 1024); // 64 mb?? 57 | buffer_size = 0; 58 | 59 | trace_file = fopen(argv[1], "r"); 60 | if (trace_file == NULL) { 61 | fprintf(stderr, "Error opening input file %s\n", argv[1]); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | while ((read = getline(&line, &len, trace_file)) != -1) { 66 | sscanf(line, "%x", &line_hex); 67 | trace_buffer[buffer_size++] = line_hex & 0xFF; 68 | trace_buffer[buffer_size++] = (line_hex >> 8) & 0xFF; 69 | trace_buffer[buffer_size++] = (line_hex >> 16) & 0xFF; 70 | trace_buffer[buffer_size++] = (line_hex >> 24) & 0xFF; 71 | } 72 | 73 | fclose(trace_file); 74 | 75 | printf("Done reading file, read %d bytes (should be %d lines)\n", buffer_size, buffer_size / 4); 76 | 77 | trace_loop(); 78 | 79 | return 0; 80 | } 81 | -------------------------------------------------------------------------------- /csc/include/zcu_cs.h: -------------------------------------------------------------------------------- 1 | #ifndef ZCU_CS_H 2 | #define ZCU_CS_H 3 | 4 | #include "common.h" 5 | 6 | #define CS_BASE 0xFE800000 7 | 8 | #define ROM 0x0 9 | #define TSGEN 0x100000 10 | #define FUNNEL0 0x110000 11 | #define FUNNEL1 0x120000 12 | #define FUNNEL2 0x130000 13 | #define TMC1 0x140000 14 | #define TMC2 0x150000 15 | #define REPLIC 0x160000 16 | #define TMC3 0x170000 17 | #define TPIU 0x180000 18 | #define CTI0 0x190000 19 | #define CTI1 0x1A0000 20 | #define CTI2 0x1B0000 21 | #define STM 0x1C0000 22 | #define FTM 0x1D0000 23 | 24 | #define R5_ROM 0x3E0000 25 | #define R5_0_DEBUG 0x3F0000 26 | #define R5_1_DEBUG 0x3F2000 27 | #define R5_0_CTI 0x3F8000 28 | #define R5_1_CTI 0x3F9000 29 | #define R5_0_ETM 0x3FC000 30 | #define R5_1_ETM 0x3FD000 31 | 32 | 33 | #define R5_0_ATCM 0xFFE00000 34 | #define R5_0_BTCM 0xFFE20000 35 | 36 | #define A53_ROM 0x400000 37 | #define A53_0_DEBUG 0x410000 38 | #define A53_0_CTI 0x420000 39 | #define A53_0_PMU 0x430000 40 | #define A53_0_ETM 0x440000 41 | #define A53_1_DEBUG 0x510000 42 | #define A53_1_CTI 0x520000 43 | #define A53_1_PMU 0x530000 44 | #define A53_1_ETM 0x540000 45 | #define A53_2_DEBUG 0x610000 46 | #define A53_2_CTI 0x620000 47 | #define A53_2_PMU 0x630000 48 | #define A53_2_ETM 0x640000 49 | #define A53_3_DEBUG 0x710000 50 | #define A53_3_CTI 0x720000 51 | #define A53_3_PMU 0x730000 52 | #define A53_3_ETM 0x740000 53 | 54 | 55 | 56 | // #define CHECK(r,p) ( (r & 0x1 << (p) ) >> (p) ) 57 | // #define SET(r,p) ( r |= 0x1 << (p) ) 58 | // #define CLEAR(r,p) ( r &= ~(0x1 << (p) ) ) 59 | 60 | enum component { 61 | Rom, 62 | Tsgen, 63 | Funnel0, 64 | Funnel1, 65 | Funnel2, 66 | Tmc1, 67 | Tmc2, 68 | Tmc3, 69 | Replic, 70 | Tpiu, 71 | Cti0, 72 | Cti1, 73 | Cti2, 74 | Stm, 75 | Ftm, 76 | R5_rom, 77 | R5_0_debug, 78 | R5_1_debug, 79 | R5_0_cti, 80 | R5_1_cti, 81 | R5_0_etm, 82 | R5_1_etm, 83 | A53_rom, 84 | A53_0_debug, 85 | A53_0_cti, 86 | A53_0_pmu, 87 | A53_0_etm, 88 | A53_1_debug, 89 | A53_1_cti, 90 | A53_1_pmu, 91 | A53_1_etm, 92 | A53_2_debug, 93 | A53_2_cti, 94 | A53_2_pmu, 95 | A53_2_etm, 96 | A53_3_debug, 97 | A53_3_cti, 98 | A53_3_pmu, 99 | A53_3_etm, 100 | }; 101 | 102 | volatile void* cs_register(enum component); 103 | 104 | #endif 105 | -------------------------------------------------------------------------------- /paper_imp/cfg/_cfg_rt_eval.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import sys 3 | 4 | def rt_eval(self, rt): 5 | d = self.solve_rt(rt) 6 | g = d['nx'] 7 | sub_rts = self.gather_rts(rt) 8 | for sub_rt in sub_rts: 9 | if sub_rt.weight is None: 10 | if not self.special_rt(sub_rt): 11 | sub_rt.weight, _, _ = self.rt_eval(sub_rt) 12 | self.attach_weight(g) 13 | total, ps = self.calc_smallest_weight(g, d) 14 | return total, ps, g 15 | 16 | def gather_rts(self, rt): 17 | bbs = self.find_rt_bb(rt) 18 | sub_rts = set(list(bb.e_succ_bb.rt for bb in bbs if bb.end_ins.ins == 'bl')) 19 | return sub_rts 20 | 21 | def attach_weight(self, g): 22 | for node in g.nodes: 23 | g.nodes[node]['weight'] = node.inst_cnt 24 | if node.end_ins.ins == 'bl': 25 | tar_rt = node.e_succ_bb.rt 26 | g.nodes[node]['weight'] += tar_rt.weight 27 | 28 | for node in g.nodes: 29 | for e in g.out_edges(node): 30 | g.edges[e]['weight'] = g.nodes[node]['weight'] 31 | 32 | def special_rt(self, rt): 33 | special_calls = ['fFreeHandle', 'iFreeHandle', 'free@plt'] 34 | rt_name = rt.name_strip 35 | if rt_name in special_calls: 36 | rt.weight = 1 # here user can add dlib call eval 37 | return True 38 | else: 39 | return False 40 | 41 | def calc_smallest_weight(self, g, d): 42 | # TODO: resuem here the entry and exit should not be by some instruction, rather, by number of in-out degree 43 | entry = d['entry'] 44 | res = sys.maxsize 45 | res_path = None 46 | if len(d['exits']) < 1: 47 | print('no exit occurss.....') 48 | [print(n) for n in g.nodes] 49 | assert False, "cannot find exit" 50 | for exit in d['exits']: 51 | try: 52 | ps = nx.shortest_path(g, entry, exit, weight = 'weight') 53 | except nx.exception.NetworkXNoPath: 54 | for n in g.nodes: 55 | print(n) 56 | assert False 57 | w = sum(list(g.nodes[n]['weight'] for n in ps)) 58 | if res > w: 59 | res = w 60 | res_path = ps 61 | return res, ps 62 | 63 | ################## 64 | # info tools # 65 | ################## 66 | 67 | def info_weight_path(self, w, ps, g): 68 | print(f'Total weight: {w}') 69 | print(f'Path:') 70 | for node in ps: 71 | sup = node.e_succ_bb.rt.name_strip if node.end_ins.ins=='bl' else '' 72 | print(f"\tWeight: {g.nodes[node]['weight']:7} {node} " + sup) 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /paper_imp/cfg/dijkstra_milestone_placement.py: -------------------------------------------------------------------------------- 1 | from tgraph_util import duplicate, get_entry, visualize 2 | from msg_binarize import binarize 3 | import networkx as nx 4 | 5 | def place_milestone(g, benchmark, blackout_window = 10000, inplace=False): 6 | if not inplace: 7 | g = duplicate(g) 8 | 9 | mg = dijkstra_solver(g, blackout_window = blackout_window) 10 | visualize(g, f'output/{benchmark}.tmsg.full.dot', label=['cost', 'iters']) 11 | visualize(g, f'output/{benchmark}.tmsg.dot', label='full') 12 | binarize(mg, f'output/{benchmark}.tmsg') 13 | return g 14 | 15 | def calculate_min_weight(g, cycles, weight='cost'): 16 | min_self_cost = {} 17 | for cycle in cycles: 18 | cost = 0 19 | for i, _ in enumerate(cycle): 20 | cost += g.edges[cycle[i], cycle[(i+1)%len(cycle)]][weight] 21 | for n in cycle: 22 | try: 23 | min_self_cost[n] = cost if cost < min_self_cost[n] else min_self_cost[n] 24 | except KeyError: 25 | min_self_cost[n] = cost 26 | return min_self_cost 27 | 28 | def dijkstra_solver(g, blackout_window=10000): 29 | # set all nodes to red 30 | nx.set_node_attributes(g, True, 'is_ms') 31 | 32 | # identify all cycles in the graph 33 | cycles = list(nx.algorithms.simple_cycles(g)) 34 | 35 | # calculate the cost for each loop. The cost is defined to be the following: 36 | # pick any node in the loop, for that node, let PC travel from this node, until it loops back 37 | # the total cost is the cost for that loop 38 | self_costs = calculate_min_weight(g, cycles) 39 | 40 | # traverse the graph 41 | entry = get_entry(g) 42 | red_nodes = list(nx.dfs_preorder_nodes(g, entry)) 43 | 44 | # if the cost of a loop is smaller than the blackout window 45 | # color every node in the loop white 46 | for n in red_nodes: 47 | if n in self_costs and self_costs[n] <= blackout_window: 48 | g.nodes[n]['is_ms'] = False 49 | red_nodes = [n for n in red_nodes if g.nodes[n]['is_ms']] 50 | 51 | # the following are by and large follow the paper 52 | while(red_nodes): 53 | red_n = red_nodes.pop(0) 54 | distances = nx.single_source_dijkstra_path_length(g, red_n, weight = 'cost') 55 | for n in red_nodes: 56 | if n in distances and distances[n] <= blackout_window: 57 | g.nodes[n]['is_ms'] = False 58 | red_nodes = [n for n in red_nodes if g.nodes[n]['is_ms']] 59 | 60 | return g 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /paper_imp/tracee/main/pmcc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "zcu_cs.h" 5 | #include "cs_pmu.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | #define BUF_SIZE (2048*1024) 15 | #define ITER 50 16 | 17 | int buf[BUF_SIZE]; 18 | 19 | int write_phase() { 20 | int i; 21 | for(i=0; icc; 58 | ct++; 59 | } 60 | 61 | 62 | 63 | int main() { 64 | 65 | // set process to FIFO 66 | struct sched_param sp = { .sched_priority = 99 }; 67 | int retno; 68 | retno = sched_setscheduler(0, SCHED_FIFO, &sp); 69 | if(retno == -1) { 70 | perror("sched set failed\n"); 71 | return 1; 72 | } 73 | 74 | // get linux time resolution 75 | clock_getres(CLOCK_REALTIME, &ts); 76 | printf("clock_rt resolution (nanosec) %ld \n", ts.tv_nsec); 77 | 78 | int policy; 79 | policy = sched_getscheduler(0); 80 | switch(policy) { 81 | case SCHED_OTHER: 82 | printf("other\n"); 83 | break; 84 | case SCHED_RR: 85 | printf("rr\n"); 86 | break; 87 | case SCHED_FIFO: 88 | printf("fifo\n"); 89 | break; 90 | default: 91 | printf("something else\n"); 92 | break; 93 | } 94 | 95 | 96 | // get PMCC 97 | PMU_interface *pmu = (PMU_interface *) cs_register(A53_0_pmu); 98 | uint64_t cc = pmu->cc; 99 | printf("init cc %lu\n", cc); 100 | 101 | int k,i; 102 | for(k=0; kcc); 104 | cclog(pmu); 105 | for(i=0; i< 1; i++) { 106 | write_phase(); 107 | read_phase(); 108 | mix_phase(); 109 | } 110 | 111 | } 112 | FILE *fptr = fopen("response.asap.ker","a"); 113 | 114 | for(k=0;k 15 | #include 16 | #include "common.h" 17 | #include "pmu_event.h" 18 | #include "cs_etm.h" 19 | #include "cs_config.h" 20 | 21 | extern volatile ETM_interface *etms[4]; 22 | 23 | int main(int argc, char *argv[]) 24 | { 25 | printf("Vanilla ZCU102 self-host trace demo.\n"); 26 | printf("Build: on %s at %s\n\n", __DATE__, __TIME__); 27 | 28 | pid_t target_pid; 29 | 30 | // Disabling all cpuidle. Access the ETM of an idled core will cause a hang. 31 | linux_disable_cpuidle(); 32 | 33 | // Pin to the 4-th core, because we will use 1st core to run the target application. 34 | pin_to_core(3); 35 | 36 | // configure TMC1 to be in Software FIFO mode 37 | cs_config_tmc1_softfifo(); 38 | 39 | // initialize ETM 40 | config_etm_n(etms[0], 0, 1); 41 | 42 | // fork a child to execute the target application 43 | for (int i = 0; i < 1; i++) 44 | { 45 | target_pid = fork(); 46 | if (target_pid == 0) 47 | { 48 | pin_to_core(i); 49 | uint64_t child_pid = (uint64_t) getpid(); 50 | 51 | // further configure ETM. So that it will only trace the process with pid == child_pid/target_pid 52 | // with the program counter in the range of 0x400000 to 0x500000 53 | etm_set_contextid_cmp(etms[0], child_pid); 54 | etm_register_range(etms[0], 0x400000, 0x500000, 1); 55 | 56 | spawn_child(poller); 57 | 58 | // Enable ETM, start trace session 59 | etm_enable(etms[0]); 60 | 61 | // execute target application 62 | execl("./hello_ETM", "hello_ETM", NULL); 63 | perror("execl failed. Target application failed to start."); 64 | exit(1); 65 | } 66 | else if (target_pid < 0) 67 | { 68 | perror("fork"); 69 | return 1; 70 | } 71 | } 72 | 73 | // wait for target application to finish 74 | int status; 75 | waitpid(target_pid, &status, 0); 76 | 77 | // Disable ETM, our trace session is done. Poller will print trace data. 78 | etm_disable(etms[0]); 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /csc/main/start_etm_pmu.c: -------------------------------------------------------------------------------- 1 | /* 2 | Brief: adapted from start_mp.c, this demo also illustrates how to insert PMU event into trace data. 3 | 4 | This demo should run on ZCU102/Kria board as long as the APU has linux running. 5 | 6 | Author: Weifan Chen 7 | Date: 2024-08-10 8 | */ 9 | 10 | #define _GNU_SOURCE 11 | #include 12 | #include 13 | #include "common.h" 14 | #include "pmu_event.h" 15 | #include "cs_etm.h" 16 | #include "cs_config.h" 17 | 18 | extern volatile ETM_interface *etms[4]; 19 | 20 | int main(int argc, char *argv[]) 21 | { 22 | printf("Vanilla ZCU102 self-host trace demo.\n"); 23 | printf("Build: on %s at %s\n\n", __DATE__, __TIME__); 24 | 25 | pid_t target_pid; 26 | 27 | // Disabling all cpuidle. Access the ETM of an idled core will cause a hang. 28 | linux_disable_cpuidle(); 29 | 30 | // Pin to the 4-th core, because we will use 1st core to run the target application. 31 | pin_to_core(3); 32 | 33 | // configure TMC1 to be in Software FIFO mode 34 | cs_config_tmc1_softfifo(); 35 | 36 | // enable PMU architectural event export 37 | config_pmu_enable_export(); 38 | 39 | // initialize ETM 40 | config_etm_n(etms[0], 0, 1); 41 | 42 | // fork a child to execute the target application 43 | for (int i = 0; i < 1; i++) 44 | { 45 | target_pid = fork(); 46 | if (target_pid == 0) 47 | { 48 | pin_to_core(i); 49 | uint64_t child_pid = (uint64_t) getpid(); 50 | 51 | // further configure ETM. So that it will only trace the process with pid == child_pid/target_pid 52 | // with the program counter in the range of 0x400000 to 0x500000 53 | etm_set_contextid_cmp(etms[0], child_pid); 54 | etm_register_range(etms[0], 0x400000, 0x500000, 1); 55 | 56 | // When L2 cache miss happens, PMU send an input to ETM, ETM then generates an Event trace packet. 57 | etm_register_pmu_event(etms[0], L2D_CACHE_REFILL_T); 58 | 59 | // add a child process to poll RRD to read trace data 60 | spawn_child(poller); 61 | 62 | // Enable ETM, start trace session 63 | etm_enable(etms[0]); 64 | 65 | // execute target application 66 | execl("./hello_ETM", "hello_ETM", NULL); 67 | perror("execl failed. Target application failed to start."); 68 | exit(1); 69 | } 70 | else if (target_pid < 0) 71 | { 72 | perror("fork"); 73 | return 1; 74 | } 75 | } 76 | 77 | // wait for target application to finish 78 | int status; 79 | waitpid(target_pid, &status, 0); 80 | 81 | // Disable ETM, our trace session is done. Poller will print trace data. 82 | etm_disable(etms[0]); 83 | 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /paper_imp/cfg/_cfg_graph.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import pygraphviz as pgv 3 | from as_cf_utils import * 4 | from colorama import Fore 5 | from colorama import Style 6 | 7 | def to_graph(self, bbs): 8 | """ Take linked bbs, assume entry is the first bb, and end with ret """ 9 | ng = nx.DiGraph() 10 | if len(bbs) == 1: 11 | end_mnemonic = bbs[0].content[-1].ins 12 | if end_mnemonic == 'ret' or end_mnemonic == 'br': 13 | ng.add_node(bbs[0]) 14 | return ng 15 | else: 16 | print(f'{Fore.YELLOW}Special ending routine {bbs[0]}, forming a none-return routine.{Style.RESET_ALL}') 17 | ng.add_node(bbs[0]) 18 | for b in bbs: 19 | end_mnemonic = b.content[-1].ins 20 | if end_mnemonic == 'bl' or end_mnemonic == 'blr': 21 | try: 22 | ng.add_edge(b, b.out_tunnel) 23 | except ValueError: 24 | print(f'{b.content[-1].raw_line.strip}') 25 | elif end_mnemonic in CB_INS: 26 | ng.add_edge(b, b.e_succ_bb) 27 | ng.add_edge(b, b.n_succ_bb) 28 | elif end_mnemonic in UB_INS: 29 | ng.add_edge(b, b.e_succ_bb) 30 | elif end_mnemonic == 'ret' or end_mnemonic == 'br' or end_mnemonic == 'blr': 31 | continue 32 | else: 33 | print(end_mnemonic) 34 | assert False 35 | return ng 36 | 37 | def nxg2pgv(self, ng): 38 | """ Take a nx DiGraph and convert to pgv """ 39 | g = pgv.AGraph(strict=False, directed=True) 40 | g.node_attr['style'] = 'filled' 41 | g.node_attr['shape'] = 'record' 42 | for n in list(ng.nodes): 43 | g.add_node(n, label=n.content_repr()) 44 | for e in list(ng.edges): 45 | g.add_edge(*e) 46 | return g 47 | 48 | def nxg2pgv_msg(self, ng): 49 | """ Take a nx DiGraph and convert to pgv """ 50 | g = pgv.AGraph(strict=False, directed=True) 51 | g.node_attr['style'] = 'filled' 52 | g.node_attr['shape'] = 'record' 53 | for n in list(ng.nodes): 54 | g.add_node(n, label=n.content_repr()) 55 | for e in list(ng.edges): 56 | g.add_edge(*e) 57 | return g 58 | 59 | def nxg2pgv_flow(self, ng, label=None): 60 | """ Take a nx DiGraph and convert to pgv """ 61 | g = pgv.AGraph(strict=False, directed=True) 62 | g.node_attr['style'] = 'filled' 63 | g.node_attr['shape'] = 'record' 64 | for n in list(ng.nodes): 65 | g.add_node(n, label=n.content_repr()) 66 | for e in list(ng.edges): 67 | if label == 'full': 68 | text = str(ng.edges[e]) 69 | else: 70 | text = ng.edges[e][label] 71 | g.add_edge(*e, label=text) 72 | return g 73 | 74 | 75 | def visual_pgv(self, rt_stat, fname): 76 | rt_name = rt_stat['routine'] 77 | vg = rt_stat['pgv'] 78 | vg.write(f'{fname}/{rt_name}.dot') -------------------------------------------------------------------------------- /ETM_data_parser/headers/trace.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_H_ 2 | #define TRACE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "handlers.h" 8 | 9 | #define Async 0b00000000 10 | #define TraceInfo 0b00000001 11 | #define LongAddress0 0b10011101 12 | #define LongAddress1 0b10011110 13 | #define LongAddress2 0b10011011 14 | #define LongAddress3 0b10011010 15 | #define ShortAddr0 0b10010110 16 | #define ShortAddr1 0b10010101 17 | #define AddrWithContext0 0b10000010 18 | #define AddrWithContext1 0b10000011 19 | #define AddrWithContext2 0b10000101 20 | #define AddrWithContext3 0b10000110 21 | #define TimeStamp0 0b00000010 22 | #define TimeStamp1 0b00000011 23 | #define Atom10 0b11110111 24 | #define Atom11 0b11110110 25 | #define Atom20 0b11011000 26 | #define Atom21 0b11011001 27 | #define Atom22 0b11011010 28 | #define Atom23 0b11011011 29 | #define Atom40 0b11011100 30 | #define Atom41 0b11011101 31 | #define Atom42 0b11011110 32 | #define Atom43 0b11011111 33 | #define Atom50 0b11010111 34 | #define Atom51 0b11010110 35 | #define Atom52 0b11010101 36 | #define Atom53 0b11110101 37 | #define ExactMatch0 0b10010000 38 | #define ExactMatch1 0b10010001 39 | #define ExactMatch2 0b10010010 40 | #define Exce 0b00000110 41 | #define ExceReturn 0b00000111 42 | #define Context0 0b10000000 43 | #define Context1 0b10000001 44 | #define FunctionReturn 0b00000101 45 | #define TraceOn 0b00000100 46 | #define Resync 0b00001000 47 | #define CCF10 0b00001111 48 | #define CCF11 0b00001110 49 | #define CCF20 0b00001101 50 | #define CCF21 0b00001100 51 | 52 | #define CC_THRESHOLD 4 53 | 54 | typedef struct address_reg { 55 | uint64_t address; 56 | uint8_t is; 57 | } address_reg_t; 58 | 59 | void trace_loop(void); 60 | 61 | void handle_async(void); 62 | void handle_resync(void); 63 | void handle_traceinfo(void); 64 | void handle_longaddress(uint8_t); 65 | void handle_shortaddress(uint8_t); 66 | void handle_exactmatch(uint8_t); 67 | void handle_addrwithcontext(uint8_t); 68 | void handle_context(uint8_t); 69 | void handle_timestamp(uint8_t); 70 | void handle_atom1(uint8_t); 71 | void handle_atom2(uint8_t); 72 | void handle_atom3(uint8_t); 73 | void handle_atom4(uint8_t); 74 | void handle_atom5(uint8_t); 75 | void handle_atom6(uint8_t); 76 | void handle_event(uint8_t); 77 | void handle_exception(void); 78 | void handle_exceptionreturn(void); 79 | void handle_functionreturn(void); 80 | void handle_traceon(void); 81 | void handle_ccf1(uint8_t); 82 | void handle_ccf2(uint8_t); 83 | void handle_ccf3(uint8_t); 84 | 85 | #endif // TRACE_H_ 86 | -------------------------------------------------------------------------------- /paper_imp/trc_parser_offline/headers/trace.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_H_ 2 | #define TRACE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "handlers.h" 8 | 9 | #define Async 0b00000000 10 | #define TraceInfo 0b00000001 11 | #define LongAddress0 0b10011101 12 | #define LongAddress1 0b10011110 13 | #define LongAddress2 0b10011011 14 | #define LongAddress3 0b10011010 15 | #define ShortAddr0 0b10010110 16 | #define ShortAddr1 0b10010101 17 | #define AddrWithContext0 0b10000010 18 | #define AddrWithContext1 0b10000011 19 | #define AddrWithContext2 0b10000101 20 | #define AddrWithContext3 0b10000110 21 | #define TimeStamp0 0b00000010 22 | #define TimeStamp1 0b00000011 23 | #define Atom10 0b11110111 24 | #define Atom11 0b11110110 25 | #define Atom20 0b11011000 26 | #define Atom21 0b11011001 27 | #define Atom22 0b11011010 28 | #define Atom23 0b11011011 29 | #define Atom40 0b11011100 30 | #define Atom41 0b11011101 31 | #define Atom42 0b11011110 32 | #define Atom43 0b11011111 33 | #define Atom50 0b11010111 34 | #define Atom51 0b11010110 35 | #define Atom52 0b11010101 36 | #define Atom53 0b11110101 37 | #define ExactMatch0 0b10010000 38 | #define ExactMatch1 0b10010001 39 | #define ExactMatch2 0b10010010 40 | #define Exce 0b00000110 41 | #define ExceReturn 0b00000111 42 | #define Context0 0b10000000 43 | #define Context1 0b10000001 44 | #define FunctionReturn 0b00000101 45 | #define TraceOn 0b00000100 46 | #define Resync 0b00001000 47 | #define CCF10 0b00001111 48 | #define CCF11 0b00001110 49 | #define CCF20 0b00001101 50 | #define CCF21 0b00001100 51 | 52 | #define CC_THRESHOLD 4 53 | 54 | typedef struct address_reg { 55 | uint64_t address; 56 | uint8_t is; 57 | } address_reg_t; 58 | 59 | void trace_loop(void); 60 | 61 | void handle_async(void); 62 | void handle_resync(void); 63 | void handle_traceinfo(void); 64 | void handle_longaddress(uint8_t); 65 | void handle_shortaddress(uint8_t); 66 | void handle_exactmatch(uint8_t); 67 | void handle_addrwithcontext(uint8_t); 68 | void handle_context(uint8_t); 69 | void handle_timestamp(uint8_t); 70 | void handle_atom1(uint8_t); 71 | void handle_atom2(uint8_t); 72 | void handle_atom3(uint8_t); 73 | void handle_atom4(uint8_t); 74 | void handle_atom5(uint8_t); 75 | void handle_atom6(uint8_t); 76 | void handle_event(uint8_t); 77 | void handle_exception(void); 78 | void handle_exceptionreturn(void); 79 | void handle_functionreturn(void); 80 | void handle_traceon(void); 81 | void handle_ccf1(uint8_t); 82 | void handle_ccf2(uint8_t); 83 | void handle_ccf3(uint8_t); 84 | 85 | #endif // TRACE_H_ 86 | -------------------------------------------------------------------------------- /paper_imp/tracee/src/argparse.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void parse_args(int argc, char *argv[], char *app, char **app_farg, char *milestone_path, ms_t* ms_mode, uint64_t* start_addr, uint64_t* end_addr) { 8 | int opt; 9 | while ((opt = getopt(argc, argv, ":a:m:g:b:e:")) != -1) { 10 | switch (opt) { 11 | case 'a': 12 | printf("First arg: %s\n", optarg); 13 | *app_farg = (char *) malloc(256); 14 | strcpy(*app_farg, optarg); 15 | break; 16 | case 'm': 17 | case 'g': 18 | strcpy(milestone_path, optarg); 19 | printf("milestone path %s\n", milestone_path); 20 | if (opt == 'm') { 21 | *ms_mode = SEQUENCE; 22 | } else if (opt == 'g') { 23 | *ms_mode = GRAPH; 24 | } 25 | break; 26 | case 'b': 27 | *start_addr = strtol(optarg, NULL, 0); 28 | break; 29 | case 'e': 30 | *end_addr = strtol(optarg, NULL, 0); 31 | break; 32 | default: 33 | break; 34 | } 35 | } 36 | 37 | if(optind +1 == argc) { 38 | strcpy(app, argv[optind]); 39 | printf("application: %s\n", app); 40 | } else { 41 | printf("Input application required!\n"); 42 | exit(1); 43 | } 44 | } 45 | 46 | void parse_args_mp(int argc, char *argv[], char *app, char **app_farg, 47 | char *milestone_path, ms_t* ms_mode, uint64_t* start_addr, uint64_t* end_addr, 48 | uint64_t* range_u, uint64_t* range_l, uint8_t* n_mp) { 49 | int opt; 50 | while ((opt = getopt(argc, argv, ":a:m:g:b:e:u:l:n:")) != -1) { 51 | switch (opt) { 52 | case 'a': 53 | printf("First arg: %s\n", optarg); 54 | *app_farg = (char *) malloc(256); 55 | strcpy(*app_farg, optarg); 56 | break; 57 | case 'm': 58 | case 'g': 59 | strcpy(milestone_path, optarg); 60 | printf("milestone path %s\n", milestone_path); 61 | if (opt == 'm') { 62 | *ms_mode = SEQUENCE; 63 | } else if (opt == 'g') { 64 | *ms_mode = GRAPH; 65 | } 66 | break; 67 | case 'b': 68 | *start_addr = strtol(optarg, NULL, 0); 69 | break; 70 | case 'e': 71 | *end_addr = strtol(optarg, NULL, 0); 72 | break; 73 | case 'u': 74 | *range_u = strtol(optarg, NULL, 0); 75 | break; 76 | case 'l': 77 | *range_l = strtol(optarg, NULL, 0); 78 | break; 79 | case 'n': 80 | *n_mp = strtol(optarg, NULL, 0); 81 | break; 82 | default: 83 | break; 84 | } 85 | } 86 | 87 | if(optind +1 == argc) { 88 | strcpy(app, argv[optind]); 89 | printf("application: %s\n", app); 90 | } else { 91 | printf("Input application required!\n"); 92 | exit(1); 93 | } 94 | } -------------------------------------------------------------------------------- /paper_imp/tracer/src/trace.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACE_H_ 2 | #define TRACE_H_ 3 | 4 | #include "handlers.h" 5 | 6 | #define Async 0b00000000 7 | #define TraceInfo 0b00000001 8 | #define LongAddress0 0b10011101 9 | #define LongAddress1 0b10011110 10 | #define LongAddress2 0b10011011 11 | #define LongAddress3 0b10011010 12 | #define ShortAddr0 0b10010110 13 | #define ShortAddr1 0b10010101 14 | #define AddrWithContext0 0b10000010 15 | #define AddrWithContext1 0b10000011 16 | #define AddrWithContext2 0b10000101 17 | #define AddrWithContext3 0b10000110 18 | #define TimeStamp0 0b00000010 19 | #define TimeStamp1 0b00000011 20 | #define Atom10 0b11110111 21 | #define Atom11 0b11110110 22 | #define Atom20 0b11011000 23 | #define Atom21 0b11011001 24 | #define Atom22 0b11011010 25 | #define Atom23 0b11011011 26 | #define Atom40 0b11011100 27 | #define Atom41 0b11011101 28 | #define Atom42 0b11011110 29 | #define Atom43 0b11011111 30 | #define Atom50 0b11010111 31 | #define Atom51 0b11010110 32 | #define Atom52 0b11010101 33 | #define Atom53 0b11110101 34 | #define ExactMatch0 0b10010000 35 | #define ExactMatch1 0b10010001 36 | #define ExactMatch2 0b10010010 37 | #define Exce 0b00000110 38 | #define ExceReturn 0b00000111 39 | #define Context0 0b10000000 40 | #define Context1 0b10000001 41 | #define FunctionReturn 0b00000101 42 | #define TraceOn 0b00000100 43 | #define Resync 0b00001000 44 | #define CCF10 0b00001111 45 | #define CCF11 0b00001110 46 | #define CCF20 0b00001101 47 | #define CCF21 0b00001100 48 | 49 | #define CC_THRESHOLD 4 50 | 51 | typedef struct address_reg { 52 | uint64_t address; 53 | uint8_t is; 54 | } address_reg_t; 55 | 56 | void trace_loop(void); 57 | 58 | void handle_async(void); 59 | void handle_resync(void); 60 | void handle_traceinfo(void); 61 | void handle_longaddress(uint8_t); 62 | void handle_shortaddress(uint8_t); 63 | void handle_exactmatch(uint8_t); 64 | void handle_addrwithcontext(uint8_t); 65 | void handle_context(uint8_t); 66 | void handle_timestamp(uint8_t); 67 | void handle_atom1(uint8_t); 68 | void handle_atom2(uint8_t); 69 | void handle_atom3(uint8_t); 70 | void handle_atom4(uint8_t); 71 | void handle_atom5(uint8_t); 72 | void handle_atom6(uint8_t); 73 | void handle_event(uint8_t); 74 | void handle_exception(void); 75 | void handle_exceptionreturn(void); 76 | void handle_functionreturn(void); 77 | void handle_traceon(void); 78 | void handle_ccf1(uint8_t); 79 | void handle_ccf2(uint8_t); 80 | void handle_ccf3(uint8_t); 81 | 82 | //void register_timing(void); 83 | static inline void register_timing(); 84 | 85 | #endif // TRACE_H_ 86 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/etm.h: -------------------------------------------------------------------------------- 1 | #ifndef ETM_H 2 | #define ETM_H 3 | 4 | #include "zcu_cs.h" 5 | #include "xil_printf.h" 6 | #include "xtime_l.h" 7 | 8 | #define TRCPRGCTLR 0x004 9 | #define TRCSTATR 0x00c 10 | 11 | #define TRCLAR 0xfb0 12 | #define TRCLSR 0xfb4 13 | #define TRCOSLAR 0x300 14 | #define TRCOSLSR 0x304 15 | 16 | #define TRCCONFIGR 0x010 17 | #define TRCEVENTCTL0R 0x020 18 | #define TRCEVENTCTL1R 0x024 19 | #define TRCSTALLCTLR 0x02c 20 | #define TRCSYNCPR 0x034 21 | #define TRCTRACEIDR 0x040 22 | #define TRCTSCTLR 0x030 23 | #define TRCVICTLR 0x080 24 | #define TRCVIIECTLR 0x084 25 | #define TRCVISSCTLR 0x088 26 | #define TRCCCCTLR 0x038 27 | #define TRCEXTINSELR 0x120 28 | #define TRCRSCTLR0 0x200 29 | #define TRCRSCTLR1 0x204 30 | #define TRCRSCTLR2 0x208 31 | #define TRCRSCTLR3 0x20c 32 | #define TRCRSCTLR4 0x210 33 | #define TRCRSCTLR5 0x214 34 | #define TRCRSCTLR6 0x218 35 | #define TRCRSCTLR7 0x21c 36 | #define TRCRSCTLR8 0x220 37 | #define TRCRSCTLR9 0x224 38 | #define TRCRSCTLR10 0x228 39 | #define TRCRSCTLR11 0x22c 40 | #define TRCRSCTLR12 0x230 41 | #define TRCRSCTLR13 0x234 42 | #define TRCRSCTLR14 0x238 43 | #define TRCRSCTLR15 0x23c 44 | 45 | // Address Comparator Value Register 46 | #define TRCACVR0 0x400 47 | #define TRCACVR1 0x408 48 | #define TRCACVR2 0x410 49 | #define TRCACVR3 0x418 50 | #define TRCACVR4 0x420 51 | #define TRCACVR5 0x428 52 | #define TRCACVR6 0x430 53 | #define TRCACVR7 0x438 54 | #define TRCACVR8 0x440 55 | #define TRCACVR9 0x448 56 | #define TRCACVR10 0x450 57 | #define TRCACVR11 0x458 58 | #define TRCACVR12 0x460 59 | #define TRCACVR13 0x468 60 | #define TRCACVR14 0x470 61 | #define TRCACVR15 0x478 62 | 63 | // Address Comparator Value Register 64 | #define TRCACATR0 0x480 65 | #define TRCACATR1 0x488 66 | #define TRCACATR2 0x490 67 | #define TRCACATR3 0x498 68 | #define TRCACATR4 0x4a0 69 | #define TRCACATR5 0x4a8 70 | #define TRCACATR6 0x4b0 71 | #define TRCACATR7 0x4b8 72 | #define TRCACATR8 0x4c0 73 | #define TRCACATR9 0x4c8 74 | #define TRCACATR10 0x4d0 75 | #define TRCACATR11 0x4d8 76 | #define TRCACATR12 0x4e0 77 | #define TRCACATR13 0x4e8 78 | #define TRCACATR14 0x4f0 79 | #define TRCACATR15 0x4f8 80 | 81 | // Context ID comparator, Cortex-A53 has only one 82 | #define TRCCIDCVRD0 0x600 83 | #define TRCCIDCCTLR 0x680 84 | 85 | #define MSG_BUFFER_SIZE (1024*2) 86 | #define MS_LOG_SIZE (1500) 87 | #define ETR_BUFFER_SIZE (1024*8) 88 | 89 | typedef struct milestone_relay { 90 | uint8_t n_valid; 91 | uint32_t address[4]; 92 | uint32_t offset[4]; 93 | uint32_t nominal_t[4]; 94 | uint32_t tail_t[4]; 95 | } milestone_relay; 96 | 97 | extern volatile uint32_t * milestones; 98 | extern volatile uint32_t *milestone_type; 99 | extern uint32_t milestones_size; 100 | extern uint32_t current_milestone; 101 | extern uint32_t current_timestamp; 102 | 103 | void etm_disable(); 104 | void etm_enable(); 105 | void etr_disable(); 106 | void etr_enable(); 107 | void etr_man_flush(); 108 | 109 | void update_graph_milestone(uint32_t address); 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /csc/main/start_etr.c: -------------------------------------------------------------------------------- 1 | /* 2 | Brief: This is a simple demo to show how to use ETM to trace a target application. 3 | 4 | This demo should run on ZCU102/Kria board as long as the APU has linux running. 5 | Contrary to the original paper, this demo does not need RPU. 6 | 7 | The purpose of this demo is to provide a template for researchers who want to use the CoreSight debug infrastructure. 8 | 9 | This demo illustrates how to use ETR to route trace data to any memory mapped address. 10 | 11 | Author: Weifan Chen 12 | Date: 2024-08-17 13 | */ 14 | 15 | #define _GNU_SOURCE 16 | #include 17 | #include 18 | #include "common.h" 19 | #include "pmu_event.h" 20 | #include "cs_etm.h" 21 | #include "cs_config.h" 22 | #include "cs_soc.h" 23 | 24 | extern volatile ETM_interface *etms[4]; 25 | extern volatile TMC_interface *tmc3; 26 | 27 | int main(int argc, char *argv[]) 28 | { 29 | printf("Vanilla ZCU102 self-host trace demo.\n"); 30 | printf("Build: on %s at %s\n\n", __DATE__, __TIME__); 31 | 32 | pid_t target_pid; 33 | 34 | // Disabling all cpuidle. Access the ETM of an idled core will cause a hang. 35 | linux_disable_cpuidle(); 36 | 37 | // Pin to the 4-th core, because we will use 1st core to run the target application. 38 | pin_to_core(3); 39 | 40 | // configure CoreSight to use ETR; The addr and size is the On-Chip memory (OCM) on chip. 41 | // You can change the addr and size to use any other 42 | // uint64_t buf_addr = 0x00FFE00000; // RPU 0 ATCM 43 | // uint32_t buf_size = 1024 * 64; 44 | uint64_t buf_addr = 0x00FFFC0000; //OCM 45 | uint32_t buf_size = 1024 * 256; 46 | 47 | cs_config_etr_mp(buf_addr, buf_size); 48 | 49 | // prepare the trace data buffer 50 | clear_buffer(buf_addr, buf_size); 51 | 52 | // initialize ETM 53 | config_etm_n(etms[0], 0, 1); 54 | 55 | // fork a child to execute the target application 56 | for (int i = 0; i < 1; i++) 57 | { 58 | target_pid = fork(); 59 | if (target_pid == 0) 60 | { 61 | pin_to_core(i); 62 | uint64_t child_pid = (uint64_t) getpid(); 63 | 64 | // further configure ETM. So that it will only trace the process with pid == child_pid/target_pid 65 | // with the program counter in the range of 0x400000 to 0x500000 66 | etm_set_contextid_cmp(etms[0], child_pid); 67 | etm_register_range(etms[0], 0x400000, 0x500000, 1); 68 | 69 | // Enable ETM, start trace session 70 | etm_enable(etms[0]); 71 | 72 | // execute target application 73 | execl("./hello_ETM", "hello_ETM", NULL); 74 | perror("execl failed. Target application failed to start."); 75 | exit(1); 76 | } 77 | else if (target_pid < 0) 78 | { 79 | perror("fork"); 80 | return 1; 81 | } 82 | } 83 | 84 | // wait for target application to finish 85 | int status; 86 | waitpid(target_pid, &status, 0); 87 | 88 | // Disable ETM, our trace session is done 89 | etm_disable(etms[0]); 90 | 91 | munmap((void *)etms[0], sizeof(ETM_interface)); 92 | 93 | // drain the TMC3 (ETR) and write the trace data to files 94 | tmc_drain_data(tmc3); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /paper_imp/trc_parser_offline/src/ctrace.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "trace.h" 8 | 9 | extern void trace_loop(void); 10 | 11 | static uint8_t * trace_buffer; 12 | static uint32_t buffer_size; 13 | static uint32_t buffer_pointer; 14 | uint8_t strip = 0; 15 | FILE *fstrip; 16 | 17 | uint32_t read_data(uint8_t* buffer, uint32_t bytes, uint8_t advance_pointer) { 18 | uint32_t read; 19 | 20 | for (read = 0; read < bytes && buffer_pointer + read < buffer_size; ++read) { 21 | buffer[read] = trace_buffer[buffer_pointer + read]; 22 | } 23 | 24 | if (advance_pointer) { 25 | buffer_pointer += read; 26 | } 27 | 28 | return read; 29 | } 30 | 31 | uint32_t advance_pointer(uint32_t offset) { 32 | buffer_pointer += offset; 33 | if (buffer_pointer >= buffer_size) 34 | buffer_pointer = buffer_size; 35 | 36 | return buffer_pointer; 37 | } 38 | 39 | uint8_t data_available() { 40 | return buffer_pointer < buffer_size; 41 | } 42 | 43 | int main(int argc, char const *argv[]) { 44 | int ctl_flow_fd; 45 | struct stat ctl_flow_stat; 46 | FILE * trace_file; 47 | char * line; 48 | unsigned int line_hex; 49 | void * ctl_ptr; 50 | size_t len = 0; 51 | ssize_t read; 52 | 53 | if (argc < 2) { 54 | fprintf(stderr, "Usage: ./ctrace [trace_input_file] [ctl_binary]\n"); 55 | exit(EXIT_FAILURE); 56 | } 57 | 58 | trace_buffer = (uint8_t *) malloc(256 * 1024 * 1024); // 64 mb?? 59 | buffer_size = 0; 60 | 61 | trace_file = fopen(argv[1], "r"); 62 | if (trace_file == NULL) { 63 | fprintf(stderr, "Error opening input file %s\n", argv[1]); 64 | exit(EXIT_FAILURE); 65 | } 66 | 67 | while ((read = getline(&line, &len, trace_file)) != -1) { 68 | sscanf(line, "%x", &line_hex); 69 | trace_buffer[buffer_size++] = line_hex & 0xFF; 70 | trace_buffer[buffer_size++] = (line_hex >> 8) & 0xFF; 71 | trace_buffer[buffer_size++] = (line_hex >> 16) & 0xFF; 72 | trace_buffer[buffer_size++] = (line_hex >> 24) & 0xFF; 73 | } 74 | 75 | fclose(trace_file); 76 | 77 | if (argc == 3 && strcmp("strip", argv[2])) { 78 | ctl_flow_fd = open(argv[2], O_RDONLY); 79 | if (ctl_flow_fd < 0) { 80 | fprintf(stderr, "Error binary ctl file %s\n", argv[2]); 81 | exit(EXIT_FAILURE); 82 | } 83 | 84 | if (fstat(ctl_flow_fd, &ctl_flow_stat) < 0) { 85 | fprintf(stderr, "Error getting stats for ctl file %s\n", argv[2]); 86 | exit(EXIT_FAILURE); 87 | } 88 | 89 | ctl_ptr = mmap(0, ctl_flow_stat.st_size, PROT_READ, MAP_PRIVATE, ctl_flow_fd, 0); 90 | if (ctl_ptr == MAP_FAILED) { 91 | fprintf(stderr, "Error mmap of ctl file %s\n", argv[2]); 92 | exit(EXIT_FAILURE); 93 | } 94 | 95 | set_ctl_buff(ctl_ptr, ctl_flow_stat.st_size / sizeof(basicblock_t)); 96 | } 97 | 98 | if (argc == 3 && !strcmp("strip", argv[2])) { 99 | fstrip = fopen("./strip.txt", "w"); 100 | strip = 1; 101 | } 102 | 103 | printf("Done reading file, read %d bytes (should be %d lines)\n", buffer_size, buffer_size / 4); 104 | 105 | trace_loop(); 106 | 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /csc/main/start_sram.c: -------------------------------------------------------------------------------- 1 | /* 2 | Brief: This is a simple demo to show how to use ETM to trace a target application. 3 | 4 | This demo should run on ZCU102/Kria board as long as the APU has linux running. 5 | Contrary to the original paper, this demo does not need RPU. 6 | 7 | The purpose of this demo is to provide a template for researchers who want to use the CoreSight debug infrastructure. 8 | 9 | This demo illustrates how to use TMC2 to store trace data to SRAM. 10 | 11 | Author: Weifan Chen 12 | Date: 2024-08-17 13 | */ 14 | 15 | #define _GNU_SOURCE 16 | #include 17 | #include 18 | #include "common.h" 19 | #include "pmu_event.h" 20 | #include "cs_etm.h" 21 | #include "cs_config.h" 22 | #include "cs_soc.h" 23 | 24 | extern volatile ETM_interface *etms[4]; 25 | extern volatile TMC_interface *tmc2; 26 | 27 | int sram_is_empty() { 28 | return (tmc2->status & (0x1 << 4)) >> 4; 29 | } 30 | 31 | int tmc2_is_ready() { 32 | return (tmc2->status & (0x1 << 2)) >> 2; 33 | } 34 | 35 | int all_read() { 36 | return tmc2->ram_read_pt == tmc2->ram_write_pt; 37 | } 38 | 39 | void read_trace_data_from_SRAM() 40 | { 41 | printf("\nDumping trace data from SRAM\n"); 42 | tmc_disable(tmc2); 43 | while(!tmc2_is_ready()); 44 | while(!all_read()) { 45 | uint32_t data = tmc2->ram_read_data; 46 | if (data != 0xffffffff) { 47 | printf("0x%08x\n", data); 48 | } 49 | } 50 | } 51 | 52 | int main(int argc, char *argv[]) 53 | { 54 | printf("Vanilla ZCU102 self-host trace demo.\n"); 55 | printf("Build: on %s at %s\n\n", __DATE__, __TIME__); 56 | 57 | pid_t target_pid; 58 | 59 | // Disabling all cpuidle. Access the ETM of an idled core will cause a hang. 60 | linux_disable_cpuidle(); 61 | 62 | // Pin to the 4-th core, because we will use 1st core to run the target application. 63 | pin_to_core(3); 64 | 65 | cs_config_SRAM(); 66 | 67 | // initialize ETM 68 | config_etm_n(etms[0], 0, 1); 69 | 70 | // fork a child to execute the target application 71 | for (int i = 0; i < 1; i++) 72 | { 73 | target_pid = fork(); 74 | if (target_pid == 0) 75 | { 76 | pin_to_core(i); 77 | uint64_t child_pid = (uint64_t) getpid(); 78 | 79 | // further configure ETM. So that it will only trace the process with pid == child_pid/target_pid 80 | // with the program counter in the range of 0x400000 to 0x500000 81 | etm_set_contextid_cmp(etms[0], child_pid); 82 | etm_register_range(etms[0], 0x400000, 0x500000, 1); 83 | 84 | // Enable ETM, start trace session 85 | etm_enable(etms[0]); 86 | 87 | // execute target application 88 | execl("./hello_ETM", "hello_ETM", NULL); 89 | perror("execl failed. Target application failed to start."); 90 | exit(1); 91 | } 92 | else if (target_pid < 0) 93 | { 94 | perror("fork"); 95 | return 1; 96 | } 97 | } 98 | 99 | // wait for target application to finish 100 | int status; 101 | waitpid(target_pid, &status, 0); 102 | 103 | // Disable ETM, our trace session is done. Poller will print trace data. 104 | etm_disable(etms[0]); 105 | 106 | read_trace_data_from_SRAM(); 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /paper_imp/tracee/src/zcu_cs.c: -------------------------------------------------------------------------------- 1 | #include "zcu_cs.h" 2 | #include "cs_soc.h" 3 | #include "cs_etm.h" 4 | #include "cs_pmu.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | void* cs_register(enum component comp) 15 | { 16 | void* ptr = NULL; 17 | int fd = open("/dev/mem", O_RDWR | O_SYNC); 18 | if (fd < 0) { 19 | perror("Cannot open /dev/mem\n"); 20 | exit(1); 21 | } 22 | 23 | switch(comp) { 24 | case Funnel0: 25 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL0); 26 | break; 27 | case Funnel1: 28 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL1); 29 | break; 30 | case Funnel2: 31 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL2); 32 | break; 33 | case Tmc1: 34 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC1); 35 | break; 36 | case Tmc2: 37 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC2); 38 | break; 39 | case Tmc3: 40 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC3); 41 | break; 42 | case Replic: 43 | ptr = mmap(NULL, sizeof(Replicator_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + REPLIC); 44 | break; 45 | case Tpiu: 46 | ptr = mmap(NULL, sizeof(TPIU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TPIU); 47 | break; 48 | case Cti0: 49 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI0); 50 | break; 51 | case Cti1: 52 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI1); 53 | break; 54 | case Cti2: 55 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI2); 56 | break; 57 | case A53_0_etm: 58 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_ETM); 59 | break; 60 | case A53_1_etm: 61 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_1_ETM); 62 | break; 63 | case A53_2_etm: 64 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_2_ETM); 65 | break; 66 | case A53_3_etm: 67 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_3_ETM); 68 | break; 69 | case A53_0_pmu: 70 | ptr = mmap(NULL, sizeof(PMU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_PMU); 71 | break; 72 | default: 73 | fprintf(stderr, "Unimplemented component %d\n", comp); 74 | exit(1); 75 | break; 76 | } 77 | 78 | if (ptr == MAP_FAILED) 79 | fprintf(stderr,"mmap to component %d failed!\n", comp); 80 | close(fd); 81 | 82 | #ifdef DEBUG 83 | #endif 84 | 85 | return ptr; 86 | } -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/sift.sift.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x404948 - 0x404984 type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lsift 404948 stp 10 | \lsift 40494c mov 11 | \lsift 404950 stp 12 | \lsift 404954 mov 13 | \lsift \ 14 | 404958 ldr 15 | \lsift 40495c stp 16 | \lsift 404960 ldr 17 | \lsift 404964 stp 18 | \lsift 404968 cmp 19 | \lsift 40496c str 20 | \lsift 404970 csel 21 | \lsift \ 22 | 404974 scvtf 23 | \lsift 404978 stp 24 | \lsift 40497c stp 25 | \lsift 404980 stp 26 | \lsift 404984 bl E:400a10 27 | \llog2f@plt"]; 28 | "BB 0x4049c0 - 0x4049cc type: bl" [label="*** Reason for MS *** 29 | \l555973129 has exceeded thresh 10000 30 | \l*** Block Info *** 31 | \lsift 4049c0 str 32 | \lsift 4049c4 mov 33 | \lsift \ 34 | 4049c8 mov 35 | \lsift 4049cc bl E:405180 36 | \ldiffss"]; 37 | "BB 0x404948 - 0x404984 type: bl" -> "BB 0x4049c0 - 0x4049cc type: bl" [label="{}"]; 38 | "BB 0x4049d0 - 0x4049d8 type: b.le" [label="*** Reason for MS *** 39 | \l8917589 has exceeded thresh 10000 40 | \l*** Block Info *** 41 | \lsift 4049d0 mov 42 | \lsift 4049d4 cmp 43 | \lsift 4049d8 \ 44 | b.le E:404e40 "]; 45 | "BB 0x4049c0 - 0x4049cc type: bl" -> "BB 0x4049d0 - 0x4049d8 type: b.le" [label="{}"]; 46 | "BB 0x404afc - 0x404b20 type: bl" [label="*** Reason for MS *** 47 | \l47254 has exceeded thresh 10000 48 | \l*** Block Info *** 49 | \lsift 404afc fmov 50 | \lsift 404b00 ldr 51 | \lsift 404b04 \ 52 | mov 53 | \lsift 404b08 mov 54 | \lsift 404b0c str 55 | \lsift 404b10 mov 56 | \lsift 404b14 mov 57 | \lsift 404b18 mov 58 | \lsift 404b1c str 59 | \lsift \ 60 | 404b20 bl E:405b28 61 | \lsiftlocalmax"]; 62 | "BB 0x4049d0 - 0x4049d8 type: b.le" -> "BB 0x404afc - 0x404b20 type: bl" [label="{}"]; 63 | "BB 0x404b24 - 0x404b30 type: bl" [label="*** Reason for MS *** 64 | \l62840 has exceeded thresh 10000 65 | \l*** Block Info *** 66 | \lsift 404b24 mov 67 | \lsift 404b28 mov 68 | \lsift 404b2c \ 69 | str 70 | \lsift 404b30 bl E:403838 71 | \lfHorzcat"]; 72 | "BB 0x404afc - 0x404b20 type: bl" -> "BB 0x404b24 - 0x404b30 type: bl" [label="{}"]; 73 | "BB 0x404f04 - 0x404f0c type: b" [label="*** Reason for MS *** 74 | \l11902 has exceeded thresh 10000 75 | \l*** Block Info *** 76 | \lsift 404f04 str 77 | \lsift 404f08 str 78 | \lsift 404f0c \ 79 | b E:404d20 "]; 80 | "BB 0x404b24 - 0x404b30 type: bl" -> "BB 0x404f04 - 0x404f0c type: b" [label="{}"]; 81 | "BB 0x404ed4 - 0x404edc type: bl" [label="*** Reason for MS *** 82 | \l11905 has exceeded thresh 10000 83 | \l*** Block Info *** 84 | \lsift 404ed4 mov 85 | \lsift 404ed8 ldr 86 | \lsift 404edc \ 87 | bl E:403768 88 | \lfFreeHandle"]; 89 | "BB 0x404b24 - 0x404b30 type: bl" -> "BB 0x404ed4 - 0x404edc type: bl" [label="{}"]; 90 | "BB 0x404eec - 0x404ef4 type: bl" [label="*** Reason for MS *** 91 | \l12180 has exceeded thresh 10000 92 | \l*** Block Info *** 93 | \lsift 404eec str 94 | \lsift 404ef0 mov 95 | \lsift 404ef4 \ 96 | bl E:403768 97 | \lfFreeHandle"]; 98 | "BB 0x404ed4 - 0x404edc type: bl" -> "BB 0x404eec - 0x404ef4 type: bl" [label="{}"]; 99 | "BB 0x404eec - 0x404ef4 type: bl" -> "BB 0x404afc - 0x404b20 type: bl" [label="{}"]; 100 | } 101 | -------------------------------------------------------------------------------- /paper_imp/tracee/main/start_mp.cpp: -------------------------------------------------------------------------------- 1 | #include "buffer.h" 2 | #include "cs_etm.h" 3 | #include "cs_config.h" 4 | #include "cs_soc.h" 5 | #include "pmu_event.h" 6 | #include "zcu_cs.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | extern ETM_interface *etms[4]; 28 | extern TMC_interface *tmc3; 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | 33 | // set up ETR buffer. R5 refer to RPU's TCM 34 | #ifdef R5 35 | uint64_t buf_addr = R5_0_ATCM + 0x8000; 36 | uint32_t buf_size = 8 * 1024 * 4; 37 | #else 38 | uint64_t buf_addr = 0xb0000000; 39 | uint32_t buf_size = 256 * 1024 * 1024; 40 | clear_buffer(buf_addr, buf_size); 41 | #endif 42 | 43 | char app[256]; 44 | char *app_farg = NULL; 45 | char milestone_path[256]; 46 | uint64_t start_addr=0; 47 | uint64_t end_addr=0; 48 | ms_t ms_mode; 49 | uint32_t *ms_ptr; 50 | uint32_t ms_size; 51 | cpu_set_t set; 52 | uint64_t range_u = 0; 53 | uint64_t range_l = 0; 54 | uint8_t n_mp = 0; 55 | 56 | parse_args_mp(argc, argv, app, &app_farg, milestone_path, &ms_mode, &start_addr, &end_addr, 57 | &range_u, &range_l, &n_mp); 58 | 59 | // pin the master thred to core3, the master core will not execute target application 60 | printf("pin master to core3\n"); 61 | CPU_ZERO(&set); 62 | CPU_SET(3, &set); 63 | sched_setaffinity(0, sizeof(cpu_set_t), &set); 64 | sched_yield(); 65 | 66 | // config Coresight infrascture 67 | cs_config_etr_mp(buf_addr, buf_size); 68 | config_etm_n(etms[0],0,1); 69 | config_etm_n(etms[1],0,2); 70 | config_etm_n(etms[2],0,3); 71 | config_etm_n(etms[3],0,4); 72 | 73 | // fork three children, each execute a target application 74 | // each child is pinned to different cores 75 | pid_t* pids = (pid_t*) malloc(sizeof(pid_t) * n_mp); 76 | int i; 77 | 78 | for (i = 0; i < n_mp; ++i) { 79 | if ((pids[i] = fork()) < 0) { 80 | perror("fork failed"); 81 | abort(); 82 | } else if (pids[i] == 0) { 83 | CPU_ZERO(&set); 84 | CPU_SET(i, &set); 85 | sched_setaffinity(0, sizeof(cpu_set_t), &set); 86 | sched_yield(); 87 | 88 | uint64_t child_pid = getpid(); 89 | etm_set_contextid_cmp(etms[i], (uint64_t)child_pid); 90 | etm_register_range(etms[i], range_u, range_l, 1); 91 | etm_enable(etms[i]); 92 | execl(app, app, app_farg, NULL); 93 | fprintf(stderr, "ERROR: execl failed.\n"); 94 | exit(0); 95 | } 96 | } 97 | 98 | /* Wait for children to exit. */ 99 | int status; 100 | pid_t pid; 101 | int temp_n = n_mp; 102 | while (temp_n > 0) { 103 | pid = wait(&status); 104 | printf("Child with PID %ld exited with status 0x%x.\n", (long)pid, status); 105 | --temp_n; // TODO(pts): Remove pid from the pids array. 106 | } 107 | for(i = 0; i < n_mp; ++i) { 108 | etm_disable(etms[i]); 109 | } 110 | tmc_man_flush(tmc3); 111 | sleep(1); // wait TMC3 (aka ETR) drains the buffer 112 | 113 | dump_buffer(buf_addr, buf_size); 114 | #ifdef R5 115 | system("sed -i 's/0xDEADBEEF/0x00000000/g' ../output/trace_1.out"); 116 | #endif 117 | return 0; 118 | } 119 | 120 | -------------------------------------------------------------------------------- /paper_imp/cfg/_cfg_solver.py: -------------------------------------------------------------------------------- 1 | from tools import derive_end_point 2 | from as_cf_utils import * 3 | from colorama import Fore, Style 4 | import networkx as nx 5 | 6 | def solve_rt(self, rt_name): 7 | """ Produce a dictionary containing all info for given routine, including: 8 | plot graph, networkx graph, immediate dominator path 9 | entry point, exit points, simple cycles 10 | hubs (a hub is a logical node for a outmost largest cycle in a routine 11 | """ 12 | d_sc = self.gather_rt_stat(rt_name) 13 | g = d_sc['nx'] 14 | entry, exits = derive_end_point(g) 15 | d_sc['idom'] = nx.algorithms.immediate_dominators(g, entry) 16 | d_sc['entry'] = entry 17 | d_sc['exits'] = exits 18 | 19 | # due to blockorization some garbage filler from prev rt are padded into the current one, remove such 20 | for i,val in enumerate(entry.content): 21 | if val.rt.name_strip == rt_name: 22 | break 23 | print(f'{Fore.YELLOW} Offend Border {rt_name} {i}, {val} removed. {Style.RESET_ALL}') 24 | entry.content = entry.content[i:] 25 | return d_sc 26 | 27 | 28 | def gather_rt_stat(self, rt_name): 29 | rt_bbs = self.find_rt_bb(rt_name) 30 | ng = self.to_graph(rt_bbs) 31 | vg = self.nxg2pgv(ng) 32 | scg = nx.simple_cycles(ng) 33 | simple_cycles = list( list(bb for bb in sc) for sc in scg) 34 | return {'routine':rt_name, 'pgv': vg, 'nx': ng, 'sc':simple_cycles} 35 | 36 | def get_milestones(self, rt_name): 37 | rt_info = self.solve_rt(rt_name) 38 | collection = {} 39 | for exit in rt_info['exits']: 40 | collection[exit] = self.produce_chain(rt_info['idom'], rt_info['entry'], exit, rt_info['sc']) 41 | return collection 42 | def get_sub_milestones(self, d_sc, entry): 43 | collection = {} 44 | idom = nx.algorithms.immediate_dominators(d_sc['nx'], entry) 45 | keys = list(idom.keys()) 46 | true_exits = list( exit for exit in d_sc['exits'] if exit in keys) 47 | for exit in true_exits: 48 | collection[exit] = self.produce_chain(idom, entry, exit, d_sc['sc']) 49 | return collection 50 | def get_imm_milestone(self, d_sc, entry): 51 | """ This has vulnerability it's untrue that the get_imm_milestone is unique. A routine might have two returns, and thus a diverge""" 52 | collection = self.get_sub_milestones(d_sc, entry) 53 | if len(collection) > 1 : 54 | print('Two immediate milestone candidates. Only return one') 55 | for k, v in collection.items(): 56 | print(k, v) 57 | for exit,chain in collection.items(): 58 | if len(chain) == 1: 59 | print(f'Reaching the ending BB of routine. No front dominator anymore') 60 | return 61 | for i, bb in enumerate(chain): 62 | if bb is entry: 63 | return chain[i+1] 64 | def produce_chain(self, idom, entry, exit, cycles=None): 65 | def remove_cc(chain, cycles): 66 | for cycle in cycles: 67 | for bb in cycle: 68 | if bb in chain: 69 | chain.remove(bb) 70 | 71 | def find_idominator(idom, bb): 72 | for k,v in idom.items(): 73 | if k is bb: 74 | return v 75 | assert False, "Every bb must have one and only one immediate dominator!" 76 | def core2(idom, cur_bb): 77 | if cur_bb is entry: 78 | return [entry] 79 | else: 80 | v = find_idominator(idom, cur_bb) 81 | return [cur_bb] + core2(idom, v) 82 | 83 | solution = core2(idom, exit) 84 | solution.reverse() 85 | if cycles: 86 | remove_cc(solution, cycles) 87 | return solution 88 | -------------------------------------------------------------------------------- /paper_imp/tracer/src/platform.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright (C) 2010 - 2015 Xilinx, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * Use of the Software is limited solely to applications: 16 | * (a) running on a Xilinx device, or 17 | * (b) that interact with a Xilinx device through a bus or interconnect. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 24 | * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | * Except as contained in this notice, the name of the Xilinx shall not be used 28 | * in advertising or otherwise to promote the sale, use or other dealings in 29 | * this Software without prior written authorization from Xilinx. 30 | * 31 | ******************************************************************************/ 32 | 33 | #include "xparameters.h" 34 | #include "xil_cache.h" 35 | 36 | #include "platform_config.h" 37 | 38 | /* 39 | * Uncomment one of the following two lines, depending on the target, 40 | * if ps7/psu init source files are added in the source directory for 41 | * compiling example outside of SDK. 42 | */ 43 | /*#include "ps7_init.h"*/ 44 | /*#include "psu_init.h"*/ 45 | 46 | #ifdef STDOUT_IS_16550 47 | #include "xuartns550_l.h" 48 | 49 | #define UART_BAUD 9600 50 | #endif 51 | 52 | void 53 | enable_caches() 54 | { 55 | #ifdef __PPC__ 56 | Xil_ICacheEnableRegion(CACHEABLE_REGION_MASK); 57 | Xil_DCacheEnableRegion(CACHEABLE_REGION_MASK); 58 | #elif __MICROBLAZE__ 59 | #ifdef XPAR_MICROBLAZE_USE_ICACHE 60 | Xil_ICacheEnable(); 61 | #endif 62 | #ifdef XPAR_MICROBLAZE_USE_DCACHE 63 | Xil_DCacheEnable(); 64 | #endif 65 | #endif 66 | } 67 | 68 | void 69 | disable_caches() 70 | { 71 | #ifdef __MICROBLAZE__ 72 | #ifdef XPAR_MICROBLAZE_USE_DCACHE 73 | Xil_DCacheDisable(); 74 | #endif 75 | #ifdef XPAR_MICROBLAZE_USE_ICACHE 76 | Xil_ICacheDisable(); 77 | #endif 78 | #endif 79 | } 80 | 81 | void 82 | init_uart() 83 | { 84 | #ifdef STDOUT_IS_16550 85 | XUartNs550_SetBaud(STDOUT_BASEADDR, XPAR_XUARTNS550_CLOCK_HZ, UART_BAUD); 86 | XUartNs550_SetLineControlReg(STDOUT_BASEADDR, XUN_LCR_8_DATA_BITS); 87 | #endif 88 | /* Bootrom/BSP configures PS7/PSU UART to 115200 bps */ 89 | } 90 | 91 | void 92 | init_platform() 93 | { 94 | /* 95 | * If you want to run this example outside of SDK, 96 | * uncomment one of the following two lines and also #include "ps7_init.h" 97 | * or #include "ps7_init.h" at the top, depending on the target. 98 | * Make sure that the ps7/psu_init.c and ps7/psu_init.h files are included 99 | * along with this example source files for compilation. 100 | */ 101 | /* ps7_init();*/ 102 | /* psu_init();*/ 103 | enable_caches(); 104 | init_uart(); 105 | } 106 | 107 | void 108 | cleanup_platform() 109 | { 110 | disable_caches(); 111 | } 112 | -------------------------------------------------------------------------------- /paper_imp/cfg/basic_block.py: -------------------------------------------------------------------------------- 1 | from as_cf_utils import ALL_BRANCH_INS, BL_INS 2 | 3 | class Lean_BB: 4 | 5 | def __init__(self, content): 6 | assert content[-1].ins in ALL_BRANCH_INS 7 | self.content = content 8 | # TODO purify would break code, due to padding code for routine like <_start>. So the precise entry point of rt is unprecise!!! 9 | # whenever bl, check whether the branching bb has consistent rt 10 | # self.purify() 11 | self.inst_cnt = len(self.content) 12 | self.rt = self.content[-1].rt 13 | self.e_succ_bb = None 14 | self.n_succ_bb = None 15 | self.pred_bbs = [] 16 | self.out_tunnel = None 17 | self.in_tunnel = None 18 | 19 | self.natural_succ = None 20 | self.n_oedge = None 21 | self.end_ins = self.content[-1] 22 | 23 | ######################## 24 | # ms. g. gen. records # 25 | ######################## 26 | 27 | self.visited = False 28 | self.is_ms = False 29 | self.ms_description = None 30 | 31 | ####################### 32 | # tracer use # 33 | ####################### 34 | self.total_hit = 0 35 | 36 | def update_natural_succ(self): 37 | if self.out_tunnel: 38 | self.natural_succ = self.out_tunnel 39 | else: 40 | self.natural_succ = self.e_succ_bb 41 | 42 | def __repr__(self) -> str: 43 | return f'BB {self.content[-1].rt.name} {hex(self.content[0].addr)} - {hex(self.content[-1].addr)} type: {self.content[-1].ins}' 44 | 45 | def update_link_e_to(self, e_bb): 46 | self.e_succ_bb = e_bb 47 | if self not in e_bb.pred_bbs: 48 | e_bb.pred_bbs.append(self) 49 | 50 | def update_link_n_to(self, n_bb): 51 | self.n_succ_bb = n_bb 52 | if self not in n_bb.pred_bbs: 53 | n_bb.pred_bbs.append(self) 54 | 55 | def update_link_ret_to(self, ret_bb): 56 | self.out_tunnel = ret_bb 57 | ret_bb.in_tunnel = self 58 | 59 | def update_n_oedge(self): 60 | if self.content[-1].ins == 'ret': 61 | self.n_oedge = 0 62 | elif self.n_succ_bb: 63 | self.n_oedge = 2 64 | else: 65 | self.n_oedge = 1 66 | 67 | def purify(self): 68 | true_rt = self.content[-1].rt 69 | head_rt = self.content[0].rt 70 | if true_rt is not head_rt: 71 | for i, asm in enumerate(self.content): 72 | if asm.rt is true_rt: 73 | break 74 | self.content = self.content[i:] 75 | 76 | 77 | ##################### 78 | # state invariance # 79 | ##################### 80 | 81 | def has_addr(self,addr): 82 | return True if addr >= self.content[0].addr and addr <= self.content[-1].addr else False 83 | 84 | def content_repr(self): 85 | repr_l = [] 86 | if self.ms_description: 87 | repr_l.append('*** Reason for MS ***') 88 | repr_l.append(self.ms_description) 89 | repr_l.append('*** Block Info ***') 90 | repr_l += list(asm.__repr__() for asm in self.content) 91 | if self.content[-1].ins == 'bl': 92 | repr_l.append(self.content[-1].to_rt_name[1:-1]) 93 | return '\n\l'.join(repr_l) 94 | 95 | def poke_plt_call(self): 96 | """ Comment: from a rt to a plt rt, it's not necessarily a bl call 97 | which means, assert self.content[-1].ins == bl might not pass 98 | example see iFreeHandle """ 99 | if not self.rt.is_plt: 100 | if self.e_succ_bb: 101 | if self.e_succ_bb.rt.is_plt: 102 | return self.e_succ_bb.rt 103 | return None 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /csc/main/start_cnt_pmu_event.c: -------------------------------------------------------------------------------- 1 | /* 2 | Brief: adapted from start_etr.c, this demo also illustrates 3 | how to emit Event Packet in trace stream when a user-chosen PMU event happens for a user-defined number of times. 4 | 5 | This demo should run on ZCU102/Kria board as long as the APU has linux running. 6 | 7 | Author: Weifan Chen 8 | Date: 2024-08-20 9 | */ 10 | 11 | /* 12 | Some observations: 13 | The address range can toggle the trace-on and trace-off. 14 | However, the emit of Event Packet is independent of the trace-on/trace-off state. 15 | When the event occurs, the ETM will send synchronization plus event packet. 16 | 17 | This demo the poller does NOT attempt to flush the TMC at all. 18 | It's unclear whether the real-time property is preserved. 19 | */ 20 | 21 | 22 | #define _GNU_SOURCE 23 | #include 24 | #include 25 | #include "common.h" 26 | #include "pmu_event.h" 27 | #include "cs_etm.h" 28 | #include "cs_config.h" 29 | #include "cs_soc.h" 30 | 31 | extern volatile ETM_interface *etms[4]; 32 | extern volatile TMC_interface *tmc3; 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | printf("Vanilla ZCU102 self-host trace demo.\n"); 37 | printf("Build: on %s at %s\n\n", __DATE__, __TIME__); 38 | 39 | pid_t target_pid; 40 | 41 | // Disabling all cpuidle. Access the ETM of an idled core will cause a hang. 42 | linux_disable_cpuidle(); 43 | 44 | // Pin to the 4-th core, because we will use 1st core to run the target application. 45 | pin_to_core(3); 46 | 47 | uint64_t buf_addr = 0x00FFFC0000; //OCM 48 | uint32_t buf_size = 1024 * 256; 49 | 50 | cs_config_etr_mp(buf_addr, buf_size); 51 | 52 | // enable PMU architectural event export 53 | config_pmu_enable_export(); 54 | 55 | // prepare the trace data buffer 56 | clear_buffer(buf_addr, buf_size); 57 | 58 | // initialize ETM 59 | config_etm_n(etms[0], 0, 1); 60 | 61 | // fork a child to execute the target application. 62 | for (int i = 0; i < 1; i++) 63 | { 64 | target_pid = fork(); 65 | if (target_pid == 0) 66 | { 67 | pin_to_core(i); 68 | uint64_t child_pid = (uint64_t) getpid(); 69 | 70 | // further configure ETM. So that it will only trace the process with pid == child_pid/target_pid 71 | etm_set_contextid_cmp(etms[0], child_pid); 72 | etm_register_range(etms[0], 0x400000, 0x500000, 1); // only trace the control flow in this range 73 | 74 | // choose one example to run 75 | // example 1: use one counter (16-bit) 76 | // etm_example_single_counter_fire_event(etms[0], L2D_CACHE_REFILL_T, 65535); // 65535 is the max value for a 16-bit counter 77 | 78 | // example 2: use two counters to form a 32 bit counter 79 | etm_example_large_counter_fire_event(etms[0], L2D_CACHE_REFILL_T, 100); 80 | 81 | // example 3: test, use a large counter to see how fast it can emit event packet 82 | // etm_example_large_counter_rapid_fire_pos(etms[0], 0, 50000); 83 | 84 | // Enable ETM, start trace session 85 | etm_enable(etms[0]); 86 | 87 | // execute target application 88 | execl("./hello_ETM", "hello_ETM", NULL); 89 | perror("execl failed. Target application failed to start."); 90 | exit(1); 91 | } 92 | else if (target_pid < 0) 93 | { 94 | perror("fork"); 95 | return 1; 96 | } 97 | } 98 | 99 | // wait for target application to finish 100 | int status; 101 | waitpid(target_pid, &status, 0); 102 | 103 | // Disable ETM, our trace session is done. 104 | etm_disable(etms[0]); 105 | 106 | // drain the TMC3 (ETR) and write the trace data to files 107 | tmc_drain_data(tmc3); 108 | // tmc_drain_data_canonical(tmc3); 109 | 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /csc/README.md: -------------------------------------------------------------------------------- 1 | # Kick-starter 2 | 3 | In this (`csc`) directory, simply `make`. You need a cross-compiler, or you can compile on the target (probably). Run any `start_*` to start demo trace session. To parse the generated trace data, you need first the `deformat`, and second the `ETM_data_parser`. 4 | 5 | ## Technical Details 6 | 7 | CoreSight Trace is non-trivial, especially one desire for advanced features. After years of digging, we present insights/thinkModel/useGuide here. The primary focuses is on Embedded Trace Macrocell (ETM). 8 | 9 | ### Resource 10 | 11 | Advanced features in ETM usually involve Trace Unit Resource. ETM supports up to 32 such resources. As for Cortex-A53, it implements 16 resources. It is useful to think each resource as a **single-input multi-output** entity. When the entity receives an input, it can assert all its output. What can be an input signal? A range of trace unit events, this includes but not limited to: 12 | 13 | - program counter hits an user-defined virtual address. 14 | - the PID changes to an user-defined value. 15 | - an external input occurs. 16 | - the counter on ETM reaches zero. 17 | 18 | You can use the **Resource Selection Control Register** to control which input the entity listens to. Now we answer who can use the output from the entity. When look up the register description in [ETM specification](https://developer.arm.com/documentation/ihi0064/latest/), whenever you see a 8-bit field, **event selector**, presents in a register, it indicates its underlying component can listen to an entity output. For example, the **counter control register** contains the 8-bit event selector field. You can then write to the field, so that the underlying counter will listen to the entity the field indicates. The manual will tell you how the counter reacts to the entity output: in this case, the counter value decrements by one. 19 | 20 | Let's push the example further. Assuming the above mentioned entity is _R2_, we now program another entity _R3_ by writting to its resource selection control register. We let **the counter reaching zero** be the input of _R3_. Thus the net result is that _R3_ will fire after a defined number inputs received by _R2_. This can be used to express semantics such as "R3 fires when the virtual address 0x400000 is hit by program counter every ten times". 21 | 22 | ### External Input Event 23 | 24 | Performance Monitor Unit (PMU) often offers valuable statistics regarding the processes by monitoring the architectural events. When ETM presents, PMU can also signal ETM when certain architectural event occurs. Notice architectural events monitored by PMU (such as instruction retired, L2 data cache refill, etc...) are not ETM event. How to let ETM listen to the architectural events? You are right! the resource can be programmed to choose a specific architectural event to be its input, by programming the resource selection control register to External Input group. Precisely, an additional register **external input selector** also involves. Check the manual. 25 | 26 | ### ETM asserts external output 27 | 28 | When an resource asserts its output, ETM can also asserts ETM itself's **External Output Pins**. What hardware the ETM output pins pointing to? This is usually hard-wired by the vendor. Check the manual. But probably some Cross-Trigger Interface (CTI) for better flexibiliy. How to let ETM delivery the output? You guessed correctly! There is an 8-bit event selector field for it. Specifically, the **Event Control Register 0** provides four event selector fields, each can be programmed to listen to a resource. By doing so, when the resource fires, ETM will also assert its corresponding external output pin. 29 | 30 | Additionally, **Event Control Register 1** also allows the fire of the resource to be reported in the trace stream in form of event packets. 31 | 32 | ## Conclusion 33 | 34 | With the examples and write-up presented here, I believe you can navigate the ETM manual relatively with ease. If you feel this repo is helpful, please Star it and [cite our paper](https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ECRTS.2023.13)! Thanks! 笔芯 35 | -------------------------------------------------------------------------------- /paper_imp/cfg/slack.py: -------------------------------------------------------------------------------- 1 | """ 2 | For final disparity run 3 | python3 slack.py ../rpu_output/disparity_bw.out 4 | 5 | For final sift run 6 | python3 slack.py ../rpu_output/sift.final.nominal 7 | 8 | For final tracking run 9 | python3 slack.py ../rpu_output/tracking.slack.out 10 | 11 | For final mser run 12 | python3 slack.py ../rpu_output/mser.slack.out 13 | """ 14 | 15 | 16 | 17 | import sys 18 | import matplotlib.pyplot as plt 19 | 20 | def preprocess(fname): 21 | with open(fname, 'r') as f: 22 | raw = f.read().splitlines() 23 | 24 | logs = [] 25 | cur_log = [] 26 | on = False 27 | for l in raw: 28 | if l == 'LOG BEGIN': 29 | on = True 30 | continue 31 | 32 | if l == 'LOG END': 33 | on = False 34 | logs.append(cur_log) 35 | cur_log = [] 36 | 37 | if on: 38 | cur_log.append(l) 39 | 40 | return logs 41 | 42 | def preprocess_log(log): 43 | for pivot,val in enumerate(log): 44 | if val[0] == '#': 45 | break 46 | 47 | get_t = lambda x: int(x.split(',')[2]) 48 | get_rt = lambda x: int(x.split(',')[3])/1000 49 | get_nt = lambda x: int(x.split(',')[4])/1000 50 | get_tt = lambda x: int(x.split(',')[5])/1000 51 | get_addr = lambda x: int(x.split(',')[1], 16) 52 | 53 | real_times = [] 54 | nominal_times = [] 55 | tail_times = [] 56 | for i in range(pivot): 57 | real_times.append(get_rt(log[i])) 58 | nominal_times.append(get_nt(log[i])) 59 | tail_times.append(get_tt(log[i])) 60 | 61 | return real_times, nominal_times, tail_times 62 | 63 | def plot_vshog(real_times, real_times_hog, nominal_times): 64 | xs = list(range(len(real_times))) 65 | plt.plot(xs, real_times, color='blue', label='real_time_normal') 66 | plt.plot(xs, real_times_hog, color='red', label='real_time_hog') 67 | plt.plot(xs, nominal_times, color='green', label='nominal') 68 | plt.xlabel('milestone hit') 69 | plt.ylabel('elapse (ms)') 70 | plt.title('Disparity Milestone Reached Diagram') 71 | plt.legend() 72 | plt.show() 73 | 74 | 75 | def plot_vstail(real_times, tail_times, nominal_times): 76 | xs = list(range(len(real_times))) 77 | plt.plot(xs, real_times, color='#8fbc8f', label='real_time') 78 | plt.plot(xs, tail_times, color='#d73a22', label='tail') 79 | plt.plot(xs, nominal_times, color='#e37564', label='nominal') 80 | plt.xlabel('milestone hit') 81 | plt.ylabel('elapse (ms)') 82 | plt.title('Mser Milestone Reached Diagram') 83 | plt.legend() 84 | plt.show() 85 | 86 | if __name__=='__main__': 87 | logs = preprocess(sys.argv[1]) 88 | bm_name = sys.argv[2] 89 | print(len(logs)) 90 | 91 | if bm_name == 'tracking': 92 | ctrl_rt, nt, tail_t = preprocess_log(logs[-2]) 93 | uctrl_rt, nt, _ = preprocess_log(logs[-1]) 94 | else: 95 | ctrl_rt, nt, tail_t = preprocess_log(logs[-1]) 96 | uctrl_rt, nt, _ = preprocess_log(logs[-2]) 97 | #normal_rt, nt, _ = preprocess_log(logs[-3]) 98 | 99 | setpt_u = list(map(lambda x: 50 + 1.3 * x, nt)) 100 | setpt_l = list(map(lambda x: -50 + 1.3 * x, nt)) 101 | setpt = list(map(lambda x: 1.3 * x, nt)) 102 | 103 | xs = list(range(len(ctrl_rt))) 104 | plt.plot(xs, nt, color = 'green', label='perfect run (nominal)') 105 | plt.plot(xs, setpt, color = '#707070', label='set-point') 106 | plt.plot(xs, setpt_u, color = '#c0c0c0', label='set-point upper bound', linestyle='dashed') 107 | plt.plot(xs, setpt_l, color = '#c0c0c0', label='set-point lower bound', linestyle='dashed') 108 | plt.plot(xs, ctrl_rt, color = '#0000ff', label='interferance controlled') 109 | plt.plot(xs, uctrl_rt, color = 'red', label='interferance uncontrolled') 110 | #plt.plot(xs, normal_rt, color = 'orange', label='normal') 111 | plt.legend() 112 | plt.xlabel('# milestone') 113 | plt.ylabel('execution time (ms)') 114 | plt.title(f'{bm_name} with Bandwidth Monitored by RPU') 115 | plt.savefig(f'{bm_name}_demo.png') 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /paper_imp/cfg/msg_binarize.py: -------------------------------------------------------------------------------- 1 | from tools import derive_end_point 2 | import struct 3 | import networkx as nx 4 | import pygraphviz as pgv 5 | 6 | def binarize(msg, routine_name=None): 7 | ms_entry, _ = derive_end_point(msg) 8 | br = bytearray() 9 | nx.set_node_attributes(msg, False, 'mapped') 10 | g_offset = 0 11 | 12 | def core(ms, back_link): 13 | nonlocal g_offset 14 | if msg.nodes[ms]['mapped']: 15 | assert back_link is not None, "Only entry has no back_link" 16 | struct.pack_into(' // only for exit(EXIT_FAILURE), should not be here 3 | 4 | static basicblock_t * ctl_ptr = 0; 5 | static uint16_t ctl_buff_size; 6 | static uint16_t curr_block_index; 7 | 8 | static uint8_t ctl_state = CTL_STATE_INIT; 9 | 10 | static uint32_t address_stack[ADDRESS_STACK_SIZE]; 11 | static uint16_t address_stack_ptr; 12 | 13 | void report(const char* format, ... ) { 14 | va_list args; 15 | va_start(args, format); 16 | vfprintf(stdout, format, args); 17 | va_end(args); 18 | fprintf(stdout, "\n"); 19 | } 20 | 21 | static void ctl_panic(void) { 22 | printf("CTL PANIC, ABORTING\n"); 23 | fprintf(stderr, "CTL PANIC\n"); 24 | exit(EXIT_FAILURE); 25 | } 26 | 27 | // TODO: Should be binary search 28 | static uint16_t find_block(uint32_t address) { 29 | uint16_t i; 30 | 31 | for (i = 1; i < ctl_buff_size - 1; ++i) { 32 | if ((address >= ctl_ptr[i].start_addr) && (address < ctl_ptr[i + 1].start_addr)) { 33 | return i; 34 | } 35 | } 36 | 37 | return 0; 38 | } 39 | 40 | static int address_stack_push(uint32_t address) { 41 | if (address_stack_ptr == ADDRESS_STACK_SIZE - 1) 42 | return -1; 43 | 44 | address_stack[++address_stack_ptr] = address; 45 | return 0; 46 | } 47 | 48 | static uint32_t address_stack_pop(void) { 49 | if (address_stack_ptr == 0) 50 | return 0; 51 | 52 | return address_stack[address_stack_ptr--]; 53 | } 54 | 55 | static inline void address_stack_clear(void) { 56 | address_stack_ptr = 0; 57 | } 58 | 59 | void set_ctl_buff(void* ptr, uint16_t size) { 60 | ctl_ptr = (basicblock_t *) ptr; 61 | ctl_buff_size = size; 62 | curr_block_index = 0; 63 | address_stack_ptr = 0; 64 | } 65 | 66 | void report_addres(uint64_t address64, uint8_t is) { 67 | uint32_t address32 = (uint32_t) (address64 & 0xffffffff); 68 | uint32_t popped_address; 69 | 70 | if (ctl_ptr == 0) 71 | return; 72 | 73 | if ((address64 != (address64 & 0xffffffff)) || !(address32 >= ctl_ptr[1].start_addr && address32 < ctl_ptr[ctl_buff_size - 1].start_addr)) { 74 | if (curr_block_index != 0) { 75 | printf("Leaving scope to 0x%lx\n\n", address64); 76 | curr_block_index = 0; 77 | //ctl_state = (ctl_state == CTL_STATE_INIT) ? CTL_STATE_INIT : CTL_STATE_OUTSCOPE; 78 | 79 | // lib/kernel is able to clear the call stack. So when entering the OUTSCOPE, clear the stack and set to INIT 80 | address_stack_clear(); 81 | ctl_state = CTL_STATE_INIT; 82 | } 83 | 84 | return; 85 | } 86 | 87 | printf("Entering scope block at 0x%x\n", address32); 88 | 89 | if (ctl_state == CTL_STATE_POP_COMP) { 90 | popped_address = address_stack_pop(); 91 | if (popped_address != address32) { 92 | printf("Return; Popped address (0x%x) and reported address (0x%x) do not match, halting\n", popped_address, address32); 93 | ctl_panic(); 94 | } else { 95 | printf("Return; Pop and compare: ok\n"); 96 | } 97 | } 98 | 99 | curr_block_index = find_block(address32); 100 | 101 | if (curr_block_index == 0) { 102 | printf("Block not found, halting\n\n"); 103 | ctl_panic(); 104 | } 105 | 106 | ctl_state = (ctl_state == CTL_STATE_PUSH) ? CTL_STATE_INSCOPE : CTL_STATE_INIT ; 107 | 108 | printf("Block index: %d\n\n", curr_block_index); 109 | } 110 | 111 | void report_atom(uint8_t atom) { 112 | if (ctl_ptr == 0) 113 | return; 114 | 115 | if (curr_block_index == 0) 116 | return; 117 | 118 | printf("Atom: %c\n", atom ? 'E' : 'N'); 119 | printf("Current block %d (0x%x): r: %d, l: %d, s: %d, c: %d, offset: 0x%x\n", curr_block_index, ctl_ptr[curr_block_index].start_addr, ctl_ptr[curr_block_index].r, ctl_ptr[curr_block_index].l, 120 | ctl_ptr[curr_block_index].s, ctl_ptr[curr_block_index].c, ctl_ptr[curr_block_index].offset); 121 | if (atom == 0) { 122 | if (ctl_ptr[curr_block_index].c == 0) { 123 | printf("C bit is 0, but Atom is N, halting\n"); 124 | ctl_panic(); 125 | } 126 | 127 | curr_block_index = curr_block_index + 1; 128 | printf("New block index: %d\n\n", curr_block_index); 129 | } else { 130 | if (ctl_ptr[curr_block_index].r) { 131 | //ctl_state = CTL_STATE_POP_COMP; 132 | ctl_state = (ctl_state == CTL_STATE_INSCOPE || ctl_state == CTL_STATE_PUSH) ? CTL_STATE_POP_COMP : CTL_STATE_INIT ; 133 | } else { 134 | if (ctl_ptr[curr_block_index].l) { 135 | printf("Pushing 0x%x\n", ctl_ptr[curr_block_index + 1].start_addr); 136 | ctl_state = CTL_STATE_PUSH; 137 | if (address_stack_push(ctl_ptr[curr_block_index + 1].start_addr) < 0) { 138 | printf("Address stack overflow, halting\n"); 139 | ctl_panic(); 140 | } 141 | } 142 | 143 | if (ctl_ptr[curr_block_index].s) { 144 | curr_block_index = ctl_ptr[curr_block_index].offset / 8 + 1; 145 | printf("Entering block at %d, address: 0x%x\n\n", curr_block_index, ctl_ptr[curr_block_index].start_addr); 146 | } 147 | } 148 | } 149 | } -------------------------------------------------------------------------------- /ETM_data_parser/src/handlers.c: -------------------------------------------------------------------------------- 1 | #include "handlers.h" 2 | #include // only for exit(EXIT_FAILURE), should not be here 3 | 4 | static basicblock_t * ctl_ptr = 0; 5 | static uint16_t ctl_buff_size; 6 | static uint16_t curr_block_index; 7 | 8 | static uint8_t ctl_state = CTL_STATE_INIT; 9 | 10 | static uint32_t address_stack[ADDRESS_STACK_SIZE]; 11 | static uint16_t address_stack_ptr; 12 | 13 | void report(const char* format, ... ) { 14 | va_list args; 15 | va_start(args, format); 16 | vfprintf(stdout, format, args); 17 | va_end(args); 18 | fprintf(stdout, "\n"); 19 | fflush(stdout); 20 | } 21 | 22 | static void ctl_panic(void) { 23 | printf("CTL PANIC, ABORTING\n"); 24 | fprintf(stderr, "CTL PANIC\n"); 25 | exit(EXIT_FAILURE); 26 | } 27 | 28 | // TODO: Should be binary search 29 | static uint16_t find_block(uint32_t address) { 30 | uint16_t i; 31 | 32 | for (i = 1; i < ctl_buff_size - 1; ++i) { 33 | if ((address >= ctl_ptr[i].start_addr) && (address < ctl_ptr[i + 1].start_addr)) { 34 | return i; 35 | } 36 | } 37 | 38 | return 0; 39 | } 40 | 41 | static int address_stack_push(uint32_t address) { 42 | if (address_stack_ptr == ADDRESS_STACK_SIZE - 1) 43 | return -1; 44 | 45 | address_stack[++address_stack_ptr] = address; 46 | return 0; 47 | } 48 | 49 | static uint32_t address_stack_pop(void) { 50 | if (address_stack_ptr == 0) 51 | return 0; 52 | 53 | return address_stack[address_stack_ptr--]; 54 | } 55 | 56 | static inline void address_stack_clear(void) { 57 | address_stack_ptr = 0; 58 | } 59 | 60 | void set_ctl_buff(void* ptr, uint16_t size) { 61 | ctl_ptr = (basicblock_t *) ptr; 62 | ctl_buff_size = size; 63 | curr_block_index = 0; 64 | address_stack_ptr = 0; 65 | } 66 | 67 | void report_addres(uint64_t address64, uint8_t is) { 68 | uint32_t address32 = (uint32_t) (address64 & 0xffffffff); 69 | uint32_t popped_address; 70 | 71 | if (ctl_ptr == 0) 72 | return; 73 | 74 | if ((address64 != (address64 & 0xffffffff)) || !(address32 >= ctl_ptr[1].start_addr && address32 < ctl_ptr[ctl_buff_size - 1].start_addr)) { 75 | if (curr_block_index != 0) { 76 | printf("Leaving scope to 0x%lx\n\n", address64); 77 | curr_block_index = 0; 78 | //ctl_state = (ctl_state == CTL_STATE_INIT) ? CTL_STATE_INIT : CTL_STATE_OUTSCOPE; 79 | 80 | // lib/kernel is able to clear the call stack. So when entering the OUTSCOPE, clear the stack and set to INIT 81 | address_stack_clear(); 82 | ctl_state = CTL_STATE_INIT; 83 | } 84 | 85 | return; 86 | } 87 | 88 | printf("Entering scope block at 0x%x\n", address32); 89 | 90 | if (ctl_state == CTL_STATE_POP_COMP) { 91 | popped_address = address_stack_pop(); 92 | if (popped_address != address32) { 93 | printf("Return; Popped address (0x%x) and reported address (0x%x) do not match, halting\n", popped_address, address32); 94 | ctl_panic(); 95 | } else { 96 | printf("Return; Pop and compare: ok\n"); 97 | } 98 | } 99 | 100 | curr_block_index = find_block(address32); 101 | 102 | if (curr_block_index == 0) { 103 | printf("Block not found, halting\n\n"); 104 | ctl_panic(); 105 | } 106 | 107 | ctl_state = (ctl_state == CTL_STATE_PUSH) ? CTL_STATE_INSCOPE : CTL_STATE_INIT ; 108 | 109 | printf("Block index: %d\n\n", curr_block_index); 110 | } 111 | 112 | void report_atom(uint8_t atom) { 113 | if (ctl_ptr == 0) 114 | return; 115 | 116 | if (curr_block_index == 0) 117 | return; 118 | 119 | printf("Atom: %c\n", atom ? 'E' : 'N'); 120 | printf("Current block %d (0x%x): r: %d, l: %d, s: %d, c: %d, offset: 0x%x\n", curr_block_index, ctl_ptr[curr_block_index].start_addr, ctl_ptr[curr_block_index].r, ctl_ptr[curr_block_index].l, 121 | ctl_ptr[curr_block_index].s, ctl_ptr[curr_block_index].c, ctl_ptr[curr_block_index].offset); 122 | if (atom == 0) { 123 | if (ctl_ptr[curr_block_index].c == 0) { 124 | printf("C bit is 0, but Atom is N, halting\n"); 125 | ctl_panic(); 126 | } 127 | 128 | curr_block_index = curr_block_index + 1; 129 | printf("New block index: %d\n\n", curr_block_index); 130 | } else { 131 | if (ctl_ptr[curr_block_index].r) { 132 | //ctl_state = CTL_STATE_POP_COMP; 133 | ctl_state = (ctl_state == CTL_STATE_INSCOPE || ctl_state == CTL_STATE_PUSH) ? CTL_STATE_POP_COMP : CTL_STATE_INIT ; 134 | } else { 135 | if (ctl_ptr[curr_block_index].l) { 136 | printf("Pushing 0x%x\n", ctl_ptr[curr_block_index + 1].start_addr); 137 | ctl_state = CTL_STATE_PUSH; 138 | if (address_stack_push(ctl_ptr[curr_block_index + 1].start_addr) < 0) { 139 | printf("Address stack overflow, halting\n"); 140 | ctl_panic(); 141 | } 142 | } 143 | 144 | if (ctl_ptr[curr_block_index].s) { 145 | curr_block_index = ctl_ptr[curr_block_index].offset / 8 + 1; 146 | printf("Entering block at %d, address: 0x%x\n\n", curr_block_index, ctl_ptr[curr_block_index].start_addr); 147 | } 148 | } 149 | } 150 | } -------------------------------------------------------------------------------- /paper_imp/tracer/src/parser.c: -------------------------------------------------------------------------------- 1 | //#include 2 | #include "platform.h" 3 | #include "xil_printf.h" 4 | #include "xtime_l.h" 5 | #include "etm.h" 6 | #include "xil_cache.h" 7 | 8 | volatile uint32_t etr_buffer_unused[ETR_BUFFER_SIZE] __attribute__((section(".trc_buf_zone"))); 9 | volatile uint32_t * etr_buffer = &etr_buffer_unused[0]; //(uint32_t *) 0xB0000000; 10 | volatile uint8_t running = 0; 11 | volatile uint32_t bandwidth_control = 1; 12 | volatile float alpha = 1.3; 13 | volatile float beta = 1; 14 | volatile uint32_t t_end = 0; 15 | uint32_t margin = 0; 16 | 17 | extern void trace_loop(void); 18 | extern milestone_relay relay; 19 | extern uint32_t milestone_graph[MSG_BUFFER_SIZE]; 20 | 21 | uint32_t buffer_pointer; // pointer to individual bytes in etr_buffer 22 | uint32_t cur_word_index = 0 ; 23 | uint32_t rounds = 0; 24 | static uint32_t last_word = 0xdeadbeef; 25 | static uint32_t last_word_index = ETR_BUFFER_SIZE + 1; 26 | 27 | void parser_reset() { 28 | print("Parser register reset.\n\r"); 29 | etr_buffer = &etr_buffer_unused[0]; 30 | running = 0; 31 | bandwidth_control = 1; 32 | alpha = 1.3; 33 | beta = 1; 34 | t_end = 0; 35 | margin = 0; 36 | cur_word_index = 0 ; 37 | rounds = 0; 38 | last_word = 0xdeadbeef; 39 | last_word_index = ETR_BUFFER_SIZE + 1; 40 | } 41 | 42 | 43 | void cache_graph(volatile uint32_t *src, uint32_t size, uint32_t *dst) { 44 | int i=0; 45 | for(i=0; i< size; i++) { 46 | dst[i] = src[i]; 47 | } 48 | } 49 | 50 | void init_relay(milestone_relay *relay) { 51 | relay -> n_valid = 1; 52 | relay -> address[0] = milestone_graph[0]; 53 | relay -> offset[0] = 0; 54 | relay -> nominal_t[0] = 0; 55 | relay -> tail_t[0] = 0; 56 | } 57 | 58 | 59 | void check_stop_condition(void) { 60 | if (running == 0) { 61 | xil_printf("Running Stopped\n\r"); 62 | xil_printf("Buffer used: %d/%d\n\r", rounds * ETR_BUFFER_SIZE + buffer_pointer, ETR_BUFFER_SIZE); 63 | Xil_DCacheFlush(); // if DCache not flushed, buffer dump would not work correctly. However not guarantee to work 64 | 65 | while(running == 0); 66 | if(running==2) { 67 | ; 68 | } 69 | } 70 | } 71 | 72 | 73 | //uint8_t data_available() { 74 | // return running; 75 | //} 76 | 77 | void start() { 78 | // Set deadbeef to buffers. deadbeef is marker for unavailable data 79 | unsigned int i; 80 | for (i = 0; i < ETR_BUFFER_SIZE; ++i) { 81 | etr_buffer[i] = 0xdeadbeef; 82 | } 83 | for (i = 0; i < MSG_BUFFER_SIZE; ++i) { 84 | milestones[i] = 0xffffffff; 85 | } 86 | Xil_DCacheFlush(); 87 | 88 | xil_printf("\n\r"); 89 | xil_printf("TPAw0v Tracer. T-Graph Circular Buffer. Stack at 0x%x\n\r", &i); 90 | xil_printf("Global Tightly Couple Memory addr offset: 0xffe00000\n\r"); 91 | xil_printf("Tracer ctl: %x, sizeof(Xtime)=%d\n\r", (uint32_t) &running, sizeof(XTime)); 92 | xil_printf("Corunner ctl: %x\n\r", (uint32_t) &bandwidth_control); 93 | xil_printf("alpha ctl: %x\n\r", (uint32_t) &alpha); 94 | xil_printf("beta ctl: %x\n\r", (uint32_t) &beta); 95 | xil_printf("T_nom ctl: %x\n\r", (uint32_t) &t_end); 96 | print("Set alpha ,beta, t_end before run application!\n\r"); 97 | print("Can be set from host by devmem\n\r"); 98 | usleep(20000000); 99 | xil_printf("Waiting for milestones to be set at 0x%x\n\r", &milestones[0]); 100 | 101 | while(milestones[0] != 0xdeadbeef) { 102 | Xil_DCacheInvalidateRange(milestones, sizeof(uint32_t) * MSG_BUFFER_SIZE); 103 | } 104 | margin = (uint32_t) ((float) t_end * (1-beta)); 105 | xil_printf("Read 0xdeadbeef, wait for hoster driver configuration\n\r"); 106 | xil_printf("margin: %d\n\r", margin); 107 | if (margin == 0) { 108 | xil_printf("margin is zero\n\r"); 109 | } else { 110 | xil_printf("margin is non-zero\n\r"); 111 | } 112 | 113 | usleep(1000000); 114 | 115 | milestones_size = milestones[1]; 116 | milestones = &milestones[2]; 117 | 118 | cache_graph(milestones, milestones_size, milestone_graph); 119 | set_addr_cmp(milestone_graph[0], 0); // before ETM starts, set the first address in trace range 120 | init_relay(&relay); 121 | 122 | if (milestones_size >= MSG_BUFFER_SIZE) { 123 | xil_printf("milestones_size should be < %d\n\r", MSG_BUFFER_SIZE); 124 | } 125 | 126 | print("hand off to trace loop.\n\r"); 127 | print("\n\r"); 128 | 129 | buffer_pointer = 0; 130 | rounds = 0; 131 | last_word = 0xdeadbeef; 132 | last_word_index = ETR_BUFFER_SIZE + 1; 133 | trace_loop(); 134 | check_stop_condition(); 135 | } 136 | 137 | 138 | int main() { 139 | init_platform(); 140 | 141 | while(1) { 142 | running = 1; 143 | start(); 144 | while(running != 2); 145 | print("RPU reset request.\n\r"); 146 | trace_reset(); 147 | etm_reset(); 148 | parser_reset(); 149 | } 150 | 151 | cleanup_platform(); 152 | } 153 | 154 | 155 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/sift.gaussianss.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x4057d0 - 0x405834 type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lgaussianss 4057d0 stp 10 | \lgaussianss 4057d4 scvtf 11 | \lgaussianss 4057d8 mov 12 | \lgaussianss \ 13 | 4057dc stp 14 | \lgaussianss 4057e0 fmov 15 | \lgaussianss 4057e4 fmov 16 | \lgaussianss 4057e8 stp 17 | \lgaussianss 4057ec fmov 18 | \lgaussianss \ 19 | 4057f0 fmov 20 | \lgaussianss 4057f4 fdiv 21 | \lgaussianss 4057f8 stp 22 | \lgaussianss 4057fc stp 23 | \lgaussianss 405800 mov 24 | \lgaussianss \ 25 | 405804 stp 26 | \lgaussianss 405808 mov 27 | \lgaussianss 40580c stp 28 | \lgaussianss 405810 mov 29 | \lgaussianss 405814 stp 30 | \lgaussianss \ 31 | 405818 sub 32 | \lgaussianss 40581c stp 33 | \lgaussianss 405820 mov 34 | \lgaussianss 405824 mov 35 | \lgaussianss 405828 stp 36 | \lgaussianss \ 37 | 40582c mov 38 | \lgaussianss 405830 add 39 | \lgaussianss 405834 bl E:400a70 40 | \lpow@plt"]; 41 | "BB 0x40587c - 0x405888 type: bl" [label="*** Reason for MS *** 42 | \l2301619 has exceeded thresh 10000 43 | \l*** Block Info *** 44 | \lgaussianss 40587c str 45 | \lgaussianss 405880 mul 46 | \lgaussianss \ 47 | 405884 sbfiz 48 | \lgaussianss 405888 bl E:4009e0 49 | \lmalloc@plt"]; 50 | "BB 0x4057d0 - 0x405834 type: bl" -> "BB 0x40587c - 0x405888 type: bl" [label="{}"]; 51 | "BB 0x4058e4 - 0x4058f4 type: bl" [label="*** Reason for MS *** 52 | \l1336285 has exceeded thresh 10000 53 | \l*** Block Info *** 54 | \lgaussianss 4058e4 fcvt 55 | \lgaussianss 4058e8 mov 56 | \lgaussianss \ 57 | 4058ec str 58 | \lgaussianss 4058f0 mov 59 | \lgaussianss 4058f4 bl E:405380 60 | \limsmooth"]; 61 | "BB 0x40587c - 0x405888 type: bl" -> "BB 0x4058e4 - 0x4058f4 type: bl" [label="{}"]; 62 | "BB 0x4058f8 - 0x4058fc type: b.le" [label="*** Reason for MS *** 63 | \l48487813 has exceeded thresh 10000 64 | \l*** Block Info *** 65 | \lgaussianss 4058f8 cmp 66 | \lgaussianss 4058fc b.le \ 67 | E:40595c "]; 68 | "BB 0x4058e4 - 0x4058f4 type: bl" -> "BB 0x4058f8 - 0x4058fc type: b.le" [label="{}"]; 69 | "BB 0x40593c - 0x405950 type: bl" [label="*** Reason for MS *** 70 | \l923583 has exceeded thresh 10000 71 | \l*** Block Info *** 72 | \lgaussianss 40593c fmul 73 | \lgaussianss 405940 mov 74 | \lgaussianss \ 75 | 405944 str 76 | \lgaussianss 405948 ldr 77 | \lgaussianss 40594c fcvt 78 | \lgaussianss 405950 bl E:405380 79 | \limsmooth"]; 80 | "BB 0x4058f8 - 0x4058fc type: b.le" -> "BB 0x40593c - 0x405950 type: bl" [label="{}"]; 81 | "BB 0x405954 - 0x405958 type: b.ne" [label="*** Reason for MS *** 82 | \l48297011 has exceeded thresh 10000 83 | \l*** Block Info *** 84 | \lgaussianss 405954 cmp 85 | \lgaussianss 405958 b.ne \ 86 | E:405918 "]; 87 | "BB 0x40593c - 0x405950 type: bl" -> "BB 0x405954 - 0x405958 type: b.ne" [label="{}"]; 88 | "BB 0x405954 - 0x405958 type: b.ne" -> "BB 0x40593c - 0x405950 type: bl" [label="{}"]; 89 | "BB 0x405adc - 0x405aec type: b.le" [label="*** Reason for MS *** 90 | \l11290 has exceeded thresh 10000 91 | \l*** Block Info *** 92 | \lgaussianss 405adc ldp 93 | \lgaussianss 405ae0 str 94 | \lgaussianss \ 95 | 405ae4 mul 96 | \lgaussianss 405ae8 cmp 97 | \lgaussianss 405aec b.le E:405a20 "]; 98 | "BB 0x405954 - 0x405958 type: b.ne" -> "BB 0x405adc - 0x405aec type: b.le" [label="{}"]; 99 | "BB 0x405a84 - 0x405a88 type: b.ne" [label="*** Reason for MS *** 100 | \l184594 has exceeded thresh 10000 101 | \l*** Block Info *** 102 | \lgaussianss 405a84 cmp 103 | \lgaussianss 405a88 b.ne \ 104 | E:405a48 "]; 105 | "BB 0x405adc - 0x405aec type: b.le" -> "BB 0x405a84 - 0x405a88 type: b.ne" [label="{}"]; 106 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x405adc - 0x405aec type: b.le" [label="{}"]; 107 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x405a84 - 0x405a88 type: b.ne" [label="{}"]; 108 | "BB 0x405aa8 - 0x405ad4 type: ret" [label="*** Reason for MS *** 109 | \l190912 has exceeded thresh 10000 110 | \l*** Block Info *** 111 | \lgaussianss 405aa8 mov 112 | \lgaussianss 405aac ldp 113 | \lgaussianss \ 114 | 405ab0 ldp 115 | \lgaussianss 405ab4 ldp 116 | \lgaussianss 405ab8 ldp 117 | \lgaussianss 405abc ldp 118 | \lgaussianss 405ac0 ldp 119 | \lgaussianss \ 120 | 405ac4 ldp 121 | \lgaussianss 405ac8 ldp 122 | \lgaussianss 405acc ldp 123 | \lgaussianss 405ad0 ldp 124 | \lgaussianss 405ad4 ret "]; 125 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x405aa8 - 0x405ad4 type: ret" [label="{}"]; 126 | } 127 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/sift.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x4057cc - 0x405834 type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lresizeArray 4057cc nop 10 | \lgaussianss 4057d0 stp 11 | \lgaussianss 4057d4 scvtf 12 | \lgaussianss \ 13 | 4057d8 mov 14 | \lgaussianss 4057dc stp 15 | \lgaussianss 4057e0 fmov 16 | \lgaussianss 4057e4 fmov 17 | \lgaussianss 4057e8 stp 18 | \lgaussianss \ 19 | 4057ec fmov 20 | \lgaussianss 4057f0 fmov 21 | \lgaussianss 4057f4 fdiv 22 | \lgaussianss 4057f8 stp 23 | \lgaussianss 4057fc stp 24 | \lgaussianss \ 25 | 405800 mov 26 | \lgaussianss 405804 stp 27 | \lgaussianss 405808 mov 28 | \lgaussianss 40580c stp 29 | \lgaussianss 405810 mov 30 | \lgaussianss \ 31 | 405814 stp 32 | \lgaussianss 405818 sub 33 | \lgaussianss 40581c stp 34 | \lgaussianss 405820 mov 35 | \lgaussianss 405824 mov 36 | \lgaussianss \ 37 | 405828 stp 38 | \lgaussianss 40582c mov 39 | \lgaussianss 405830 add 40 | \lgaussianss 405834 bl E:400a70 41 | \lpow@plt"]; 42 | "BB 0x40587c - 0x405888 type: bl" [label="*** Reason for MS *** 43 | \l2303642 has exceeded thresh 10000 44 | \l*** Block Info *** 45 | \lgaussianss 40587c str 46 | \lgaussianss 405880 mul 47 | \lgaussianss \ 48 | 405884 sbfiz 49 | \lgaussianss 405888 bl E:4009e0 50 | \lmalloc@plt"]; 51 | "BB 0x4057cc - 0x405834 type: bl" -> "BB 0x40587c - 0x405888 type: bl" [label="{}"]; 52 | "BB 0x4058e4 - 0x4058f4 type: bl" [label="*** Reason for MS *** 53 | \l1334579 has exceeded thresh 10000 54 | \l*** Block Info *** 55 | \lgaussianss 4058e4 fcvt 56 | \lgaussianss 4058e8 mov 57 | \lgaussianss \ 58 | 4058ec str 59 | \lgaussianss 4058f0 mov 60 | \lgaussianss 4058f4 bl E:405380 61 | \limsmooth"]; 62 | "BB 0x40587c - 0x405888 type: bl" -> "BB 0x4058e4 - 0x4058f4 type: bl" [label="{}"]; 63 | "BB 0x4058f8 - 0x4058fc type: b.le" [label="*** Reason for MS *** 64 | \l48496855 has exceeded thresh 10000 65 | \l*** Block Info *** 66 | \lgaussianss 4058f8 cmp 67 | \lgaussianss 4058fc b.le \ 68 | E:40595c "]; 69 | "BB 0x4058e4 - 0x4058f4 type: bl" -> "BB 0x4058f8 - 0x4058fc type: b.le" [label="{}"]; 70 | "BB 0x40593c - 0x405950 type: bl" [label="*** Reason for MS *** 71 | \l923583 has exceeded thresh 10000 72 | \l*** Block Info *** 73 | \lgaussianss 40593c fmul 74 | \lgaussianss 405940 mov 75 | \lgaussianss \ 76 | 405944 str 77 | \lgaussianss 405948 ldr 78 | \lgaussianss 40594c fcvt 79 | \lgaussianss 405950 bl E:405380 80 | \limsmooth"]; 81 | "BB 0x4058f8 - 0x4058fc type: b.le" -> "BB 0x40593c - 0x405950 type: bl" [label="{}"]; 82 | "BB 0x405954 - 0x405958 type: b.ne" [label="*** Reason for MS *** 83 | \l48297317 has exceeded thresh 10000 84 | \l*** Block Info *** 85 | \lgaussianss 405954 cmp 86 | \lgaussianss 405958 b.ne \ 87 | E:405918 "]; 88 | "BB 0x40593c - 0x405950 type: bl" -> "BB 0x405954 - 0x405958 type: b.ne" [label="{}"]; 89 | "BB 0x405954 - 0x405958 type: b.ne" -> "BB 0x40593c - 0x405950 type: bl" [label="{}"]; 90 | "BB 0x4059a8 - 0x4059b4 type: bl" [label="*** Reason for MS *** 91 | \l11583 has exceeded thresh 10000 92 | \l*** Block Info *** 93 | \lgaussianss 4059a8 fmov 94 | \lgaussianss 4059ac fmov 95 | \lgaussianss \ 96 | 4059b0 mov 97 | \lgaussianss 4059b4 bl E:400a70 98 | \lpow@plt"]; 99 | "BB 0x405954 - 0x405958 type: b.ne" -> "BB 0x4059a8 - 0x4059b4 type: bl" [label="{}"]; 100 | "BB 0x405a84 - 0x405a88 type: b.ne" [label="*** Reason for MS *** 101 | \l188391 has exceeded thresh 10000 102 | \l*** Block Info *** 103 | \lgaussianss 405a84 cmp 104 | \lgaussianss 405a88 b.ne \ 105 | E:405a48 "]; 106 | "BB 0x4059a8 - 0x4059b4 type: bl" -> "BB 0x405a84 - 0x405a88 type: b.ne" [label="{}"]; 107 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x4059a8 - 0x4059b4 type: bl" [label="{}"]; 108 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x405a84 - 0x405a88 type: b.ne" [label="{}"]; 109 | "BB 0x405aa8 - 0x405ad4 type: ret" [label="*** Reason for MS *** 110 | \l194533 has exceeded thresh 10000 111 | \l*** Block Info *** 112 | \lgaussianss 405aa8 mov 113 | \lgaussianss 405aac ldp 114 | \lgaussianss \ 115 | 405ab0 ldp 116 | \lgaussianss 405ab4 ldp 117 | \lgaussianss 405ab8 ldp 118 | \lgaussianss 405abc ldp 119 | \lgaussianss 405ac0 ldp 120 | \lgaussianss \ 121 | 405ac4 ldp 122 | \lgaussianss 405ac8 ldp 123 | \lgaussianss 405acc ldp 124 | \lgaussianss 405ad0 ldp 125 | \lgaussianss 405ad4 ret "]; 126 | "BB 0x405a84 - 0x405a88 type: b.ne" -> "BB 0x405aa8 - 0x405ad4 type: ret" [label="{}"]; 127 | } 128 | -------------------------------------------------------------------------------- /support/enable_arm_pmu.c: -------------------------------------------------------------------------------- 1 | /* 2 | Kernel-PMU 3 | Enabling user-mode access to the performance monitor unit (PMU) on ARMv8 Aarch64 and ARMv7 4 | Copyright (C) 2019 Bruno Pairault 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | */ 19 | 20 | /* Inspired with 21 | https://community.arm.com/dev-platforms/f/discussions/10366/help-configuring-pmu-s 22 | https://patchwork.kernel.org/patch/5217341/ 23 | */ 24 | 25 | /* Enable user-mode ARM performance counter access on ARMv7 & Armv8 Aarch64 */ 26 | #include 27 | #include 28 | #include 29 | 30 | 31 | #if !defined(__arm__) && !defined(__aarch64__) 32 | #error Module can only be compiled on ARM. 33 | #endif 34 | 35 | /* #define from /lib/modules/uname-r/source/arch/arm64/include/asm/perf_event.h in ASM Aarch 64 */ 36 | #define ARMV8_PMCR_MASK 0x3f 37 | #define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ 38 | #define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ 39 | #define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ 40 | #define ARMV8_PMCR_N_MASK 0x1f 41 | 42 | #define ARMV8_PMUSERENR_EN_EL0 (1 << 0) /* EL0 access enable */ 43 | #define ARMV8_PMUSERENR_CR (1 << 2) /* Cycle counter read enable */ 44 | #define ARMV8_PMUSERENR_ER (1 << 3) /* Event counter read enable */ 45 | 46 | #define ARMV8_PMCNTENSET_EL0_ENABLE (1<<31) /* *< Enable Perf count reg */ 47 | 48 | #define PERF_DEF_OPTS (1 | 16) 49 | 50 | static inline u32 armv8pmu_read(void) 51 | { 52 | u64 val=0; 53 | asm volatile("MRS %0, pmcr_el0" : "=r" (val)); 54 | return (u32)val; 55 | } 56 | static inline void armv8pmu_write(u32 val) 57 | { 58 | val &= ARMV8_PMCR_MASK; 59 | asm volatile("isb" : : : "memory"); 60 | asm volatile("MSR pmcr_el0, %0" : : "r" ((u64)val)); 61 | } 62 | 63 | static void 64 | enable_cpu_counters(void* data) 65 | { 66 | printk(KERN_INFO "ENABLE_ARM_PMU enabling user PMU access on CPU-Core #%d", smp_processor_id()); 67 | #if __aarch64__ 68 | /* Enable user-mode access to counters. */ 69 | asm volatile("MSR pmuserenr_el0, %0" : : "r"((u64)ARMV8_PMUSERENR_EN_EL0|ARMV8_PMUSERENR_ER|ARMV8_PMUSERENR_CR)); 70 | /* Initialize & Reset PMNC: C and P bits. */ 71 | armv8pmu_write(ARMV8_PMCR_P | ARMV8_PMCR_C); 72 | asm volatile("MSR pmintenset_el1, %0" : : "r" ((u64)(0 << 31))); 73 | /* Count Enable Set register bit 31 enable */ 74 | asm volatile("MSR pmcntenset_el0, %0" : : "r" (ARMV8_PMCNTENSET_EL0_ENABLE)); 75 | armv8pmu_write(armv8pmu_read() | ARMV8_PMCR_E); 76 | 77 | #elif defined(__ARM_ARCH_7A__) 78 | 79 | /* Enable user-mode access to counters. */ 80 | asm volatile("MCR p15, 0, %0, c9, c14, 0" :: "r"(1)); 81 | /* Program PMU and enable all counters */ 82 | asm volatile("MCR p15, 0, %0, c9, c12, 0" :: "r"(PERF_DEF_OPTS)); 83 | asm volatile("MCR p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f)); 84 | #else 85 | #error Module Does Not Support your ARM 86 | #endif 87 | } 88 | 89 | static void 90 | disable_cpu_counters(void* data) 91 | { 92 | printk(KERN_INFO "ENABLE_ARM_PMU disabling user PMU access on CPU-Core #%d", smp_processor_id()); 93 | 94 | #if __aarch64__ 95 | /* Performance Monitors Count Enable Set register bit 31:0 disable */ 96 | asm volatile("MSR pmcntenset_el0, %0" : : "r" (0<<31)); 97 | /* Disable all counters and user-mode access to counters. */ 98 | armv8pmu_write(armv8pmu_read() |~ ARMV8_PMCR_E); 99 | asm volatile("MSR pmuserenr_el0, %0" : : "r"((u64)0)); 100 | #elif defined(__ARM_ARCH_7A__) 101 | asm volatile("MCR p15, 0, %0, c9, c12, 0" :: "r"(0)); 102 | /* Disable all counters and user-mode access to counters. */ 103 | asm volatile("MCR p15, 0, %0, c9, c12, 2" :: "r"(0x8000000f)); 104 | asm volatile("MCR p15, 0, %0, c9, c14, 0" :: "r"(0)); 105 | #else 106 | #error Module Does Not Support your ARM 107 | #endif 108 | } 109 | 110 | static int __init 111 | init(void) 112 | { 113 | on_each_cpu(enable_cpu_counters, NULL, 1); 114 | printk(KERN_INFO "ENABLE_ARM_PMU Initialized"); 115 | return 0; 116 | } 117 | 118 | static void __exit 119 | leave(void) 120 | { 121 | on_each_cpu(disable_cpu_counters, NULL, 1); 122 | printk(KERN_INFO "ENABLE_ARM_PMU Unloaded"); 123 | } 124 | 125 | MODULE_LICENSE("GPL"); 126 | MODULE_DESCRIPTION("Enables user-mode access to ARMv7A-v8 Aarch64 PMU counters"); 127 | MODULE_VERSION("1:0.0-dev"); 128 | module_init(init); 129 | module_exit(leave); -------------------------------------------------------------------------------- /paper_imp/tracer/src/etm.c: -------------------------------------------------------------------------------- 1 | #include "etm.h" 2 | 3 | static volatile uint8_t *prog_ctrl = CS_BASE + A53_0_ETM + TRCPRGCTLR; 4 | static volatile uint8_t *trace_status = CS_BASE + A53_0_ETM + TRCSTATR; 5 | static volatile uint32_t *addr_cmp_7 = CS_BASE + A53_0_ETM + TRCACVR7; 6 | static volatile uint32_t *addr_cmp_6 = CS_BASE + A53_0_ETM + TRCACVR6; 7 | static volatile uint32_t *addr_cmp_5 = CS_BASE + A53_0_ETM + TRCACVR5; 8 | static volatile uint32_t *addr_cmp_4 = CS_BASE + A53_0_ETM + TRCACVR4; 9 | static volatile uint32_t *addr_cmp_3 = CS_BASE + A53_0_ETM + TRCACVR3; 10 | static volatile uint32_t *addr_cmp_2 = CS_BASE + A53_0_ETM + TRCACVR2; 11 | static volatile uint32_t *addr_cmp_1 = CS_BASE + A53_0_ETM + TRCACVR1; 12 | static volatile uint32_t *addr_cmp_0 = CS_BASE + A53_0_ETM + TRCACVR0; 13 | 14 | static volatile uint32_t * etr_ctrl = CS_BASE + TMC3 + TMCTRG; 15 | static volatile uint32_t * etr_ffcr = CS_BASE + TMC3 + FFCR; 16 | 17 | volatile uint32_t * milestones = 0xfffc0000; 18 | uint32_t milestones_size = 0; 19 | uint32_t current_milestone = 0; 20 | uint32_t current_timestamp = 0; 21 | uint32_t milestone_graph [MSG_BUFFER_SIZE] = {0xffffffff}; 22 | uint32_t g_nominal_time = 0; 23 | uint32_t n_times[MS_LOG_SIZE] = {0}; 24 | uint32_t n_times_pt = 0; 25 | uint32_t tail_times[MS_LOG_SIZE] = {0}; 26 | uint32_t tail_times_pt = 0; 27 | uint32_t g_real_time = 0; 28 | uint32_t n_slack_ct = 0; 29 | uint32_t tail_violation_ct = 0; 30 | uint32_t p_slack_ct = 0; 31 | milestone_relay relay; 32 | milestone_relay tmp_relay; 33 | 34 | extern volatile uint8_t running; 35 | extern volatile uint32_t bandwidth_control; 36 | extern volatile float alpha; 37 | extern volatile float beta; 38 | extern uint32_t margin; 39 | uint32_t resumes[256] = {0}; 40 | uint32_t pauses[256] = {0}; 41 | uint32_t resume_pt=0; 42 | uint32_t pause_pt=0; 43 | 44 | void etm_reset() { 45 | print("RPU ETM config reset.\n\r"); 46 | milestones = 0xfffc0000; 47 | milestones_size = 0; 48 | current_milestone = 0; 49 | current_timestamp = 0; 50 | g_nominal_time = 0; 51 | n_times_pt = 0; 52 | tail_times_pt = 0; 53 | g_real_time = 0; 54 | n_slack_ct = 0; 55 | tail_violation_ct = 0; 56 | p_slack_ct = 0; 57 | resume_pt=0; 58 | pause_pt=0; 59 | 60 | int i; 61 | for(i=0; i< MSG_BUFFER_SIZE; i++){ 62 | milestone_graph[i] = 0xffffffff; 63 | } 64 | for(i=0; i g_nominal_time * alpha) { 130 | if(bandwidth_control==1) { 131 | pauses[pause_pt++] = n_times_pt - 1; 132 | bandwidth_control = 0; 133 | } 134 | } else if (g_real_time < g_nominal_time * alpha - margin) { 135 | if(bandwidth_control==0){ 136 | resumes[resume_pt++] = n_times_pt - 1; 137 | bandwidth_control = 1; 138 | } 139 | } 140 | // else if (g_real_time > relay.tail_t[i] * alpha ) { 141 | // if(bandwidth_control==1){ 142 | // pauses[pause_pt++] = n_times_pt - 1; 143 | // bandwidth_control = 0; 144 | // } 145 | // } 146 | break; 147 | } 148 | } 149 | for(j=0; j<4; j++){ 150 | uint32_t position = (relay.offset[i]) + 2 + 2 * j; 151 | uint32_t val = milestone_graph[position]; 152 | if (val != 0xffffffff) { 153 | uint32_t new_address = milestone_graph[val/4]; 154 | uint32_t new_offset = val/4; 155 | tmp_relay.address[j] = new_address; 156 | tmp_relay.offset[j] = new_offset; 157 | tmp_relay.tail_t[j] = milestone_graph[new_offset + 1]; 158 | tmp_relay.nominal_t[j] = milestone_graph[position + 1]; 159 | set_addr_cmp(new_address, j); 160 | } else { 161 | break; 162 | } 163 | } 164 | relay.n_valid = j; 165 | etm_enable(); 166 | 167 | if (!relay.n_valid) { // this means the last milestone is reached 168 | running = 0; 169 | report_results(); 170 | } 171 | 172 | for(i=0;i {hex(v)}') 48 | edge_data = msg.get_edge_data(u,v) 49 | if 'measures' in edge_data: 50 | edge_data['measures'].append(measure) 51 | else: 52 | edge_data['measures'] = [measure] 53 | 54 | if 'tail_t' in msg.nodes[v]: 55 | cur_t = msg.nodes[v]['tail_t'] 56 | msg.nodes[v]['tail_t'] = g_time if g_time > cur_t else cur_t 57 | else: 58 | msg.nodes[v]['tail_t'] = g_time 59 | 60 | # if 'tail_t' in edge_data: 61 | # cur_t = edge_data['tail_t'] 62 | # edge_data['tail_t'] = g_time if g_time > cur_t else cur_t 63 | # else: 64 | # edge_data['tail_t'] = g_time 65 | 66 | 67 | return msg 68 | 69 | def gen_nominal(msg): 70 | for e in msg.edges: 71 | data = msg.get_edge_data(*e) 72 | if 'measures' in data: 73 | data['measures'] = max(data['measures']) 74 | else: 75 | data['measures'] = 0 76 | return msg 77 | 78 | def binarize_relative_tail(msg, output_name=None): 79 | ms_entry, _ = derive_end_point(msg) 80 | br = bytearray() 81 | nx.set_node_attributes(msg, False, 'mapped') 82 | g_offset = 0 83 | 84 | def core(ms, back_link, back_link_ms): 85 | nonlocal g_offset 86 | if msg.nodes[ms]['mapped']: 87 | assert back_link is not None, "Only entry has no back_link" 88 | struct.pack_into(' 8 | #include 9 | #include 10 | 11 | 12 | uint8_t n_mp; 13 | 14 | // requires 2 positional arguments: the number of active ETMs and the input file name 15 | void parse_args(int argc, char *argv[], uint8_t* n_mp) { 16 | if(argc != 3) { 17 | // printf("One positional argument required. i.e. the number of active ETMs\n"); 18 | printf("Usage %s \n", argv[0]); 19 | exit(1); 20 | } 21 | *n_mp = strtol(argv[1], NULL, 0); 22 | return; 23 | } 24 | 25 | /* By the convention in this project, the IDs of the ETM should be set from 1 to n 26 | Thus if the ID reported in the formatter is larger than that, print a warning message 27 | 28 | There are IDs used by CoreSight for other purpose. Thus it might not indicate a trace data corruption. This deformatter is not designed to handle such cases for now. 29 | */ 30 | void check_id(uint8_t id) { 31 | if (!(id <= n_mp)) { 32 | printf("detect ID %u, which is larger than the number of active ETMs. Check manual whether this is a valid ID, otherwise this signals trace data corruption.\n", id); 33 | } 34 | } 35 | 36 | /* 37 | The ID starts from 1 to 4. Thus when indexing, the position is id-1. 38 | The first ETM ID is 1 instead of 0, since potentially, the ID 0 might be reserved 39 | */ 40 | FILE* id2file(FILE** fps, int id) { 41 | return fps[id - 1]; 42 | } 43 | 44 | 45 | /* 46 | frame_buf is 16 bytes long. Only the entire 16bytes are recevied, the deformatting can start meaningfully. 47 | the cur_id is consistent with ETM ID. 48 | 49 | cur_id == 0 is reserved to indicate null. This could happen when trace end or flush. 50 | The data bytes associate with ID 0 are discarded. 51 | */ 52 | void proc_frame(FILE** fps, uint8_t* frame_buf, int* cur_id) { 53 | int i; 54 | char aux = frame_buf[15]; 55 | for(i=0; i<8; i++) { 56 | if ( (frame_buf[i*2] & 0x1) && (aux & (0x1 << i)) ) { 57 | // new ID and the next byte corresponding to the old ID 58 | if(i==7) { 59 | printf("auxiliary fault!\n"); 60 | exit(0); 61 | } 62 | if (*cur_id != 0) { 63 | fwrite(&frame_buf[i*2 + 1], sizeof(uint8_t), 1, id2file(fps, *cur_id)); 64 | } 65 | *cur_id = (frame_buf[i*2] & 0xfe) >> 1; 66 | check_id(*cur_id); 67 | } else if ( (frame_buf[i*2] & 0x1) && !(aux & (0x1 << i)) ) { 68 | // new ID and the next byte corresponding to the new ID 69 | *cur_id = (frame_buf[i*2] & 0xfe) >> 1; 70 | check_id(*cur_id); 71 | if (*cur_id == 0) { 72 | continue; 73 | } 74 | if(i != 7) { 75 | fwrite(&frame_buf[i*2 + 1], sizeof(uint8_t), 1, id2file(fps, *cur_id)); 76 | } 77 | } else { 78 | // Data byte 79 | if (*cur_id == 0) { 80 | continue; 81 | } 82 | char dat = (frame_buf[i*2] & 0xfe) | ((aux & (0x1 << i)) >> i); 83 | FILE* tar_fp = id2file(fps, *cur_id); 84 | fwrite(&dat, sizeof(uint8_t), 1, tar_fp); 85 | if(i != 7) { 86 | fwrite(&frame_buf[i*2 + 1], sizeof(uint8_t), 1, tar_fp); 87 | } 88 | } 89 | } 90 | } 91 | 92 | void dat2out(char* ifname, char* ofname) { 93 | FILE* f1 = fopen(ifname, "rb"); 94 | FILE* f2 = fopen(ofname, "w"); 95 | 96 | uint32_t buf; 97 | int status; 98 | while(1) { 99 | status = fread(&buf, sizeof(uint32_t), 1, f1); 100 | if (status != 1) { 101 | break; 102 | } 103 | fprintf(f2, "0x%08X\n", buf); 104 | } 105 | fclose(f1); 106 | fclose(f2); 107 | } 108 | 109 | /* print the 16byte frame, with 4 bytes per-line in format of 0x%08X, so total of four lines */ 110 | void print_frame(uint8_t* frame_buf) { 111 | int i; 112 | for(i=0; i<4; i++) { 113 | printf("0x%02X%02X%02X%02X\n", frame_buf[i*4+3], frame_buf[i*4 + 2], frame_buf[i*4 + 1], frame_buf[i*4 + 0]); 114 | } 115 | } 116 | 117 | 118 | // requires two positional arguments: the number of active ETMs and the input file name 119 | int main(int argc, char *argv[]) { 120 | 121 | if (argc != 3) { 122 | printf("Usage %s \n", argv[0]); 123 | exit(1); 124 | } 125 | 126 | n_mp = strtol(argv[1], NULL, 0); 127 | char* fname = argv[2]; 128 | 129 | uint8_t frame_buf[16]; 130 | FILE* fp = fopen(fname, "rb"); 131 | int status; 132 | int cur_id = -1; 133 | 134 | // parse_args(argc, argv, &n_mp); 135 | FILE** fps = (FILE**) malloc(sizeof(FILE*) * n_mp); 136 | int i; 137 | for(i=0; i 2 | #include 3 | #include "cs_etm.h" 4 | #include "cs_soc.h" 5 | #include "zcu_cs.h" 6 | 7 | ETM_interface *etm; 8 | ETM_interface *etms[4]; 9 | Replicator_interface *replicator; 10 | Funnel_interface *funnel1; 11 | Funnel_interface *funnel2; 12 | TMC_interface *tmc1; 13 | TMC_interface *tmc2; 14 | TMC_interface *tmc3; 15 | 16 | void cs_config_etr(uint64_t buf_addr, uint32_t buf_size) { 17 | printf(" ------Coresight Configure Using ETR------\n"); 18 | printf(" Buffer Address: 0x%lx\n", buf_addr); 19 | printf(" Buffer Size : %d (bytes)\n", buf_size); 20 | printf(" -------------- End Info -----------------\n\n"); 21 | 22 | etm = (ETM_interface *) cs_register(A53_0_etm); 23 | replicator = (Replicator_interface *) cs_register(Replic); 24 | funnel1 = (Funnel_interface *) cs_register(Funnel1); 25 | funnel2 = (Funnel_interface *) cs_register(Funnel2); 26 | tmc1 = (TMC_interface *) cs_register(Tmc1); 27 | tmc2 = (TMC_interface *) cs_register(Tmc2); 28 | tmc3 = (TMC_interface *) cs_register(Tmc3); 29 | 30 | funnel_unlock(funnel1); 31 | funnel_unlock(funnel2); 32 | //funnel_config_port(funnel1, 0xf, 0); 33 | //funnel_config_port(funnel2, 0x4, 0); //0x4 is Lauterbach setting, for mp, try use all port 34 | funnel_config_port(funnel1, 0xff, 0); 35 | funnel_config_port(funnel2, 0xff, 0); 36 | 37 | tmc_unlock(tmc1); 38 | tmc_unlock(tmc2); 39 | tmc_unlock(tmc3); 40 | tmc_disable(tmc1); 41 | tmc_disable(tmc2); 42 | tmc_disable(tmc3); 43 | tmc_set_mode(tmc1, Hard); 44 | tmc_set_mode(tmc2, Hard); 45 | tmc_set_mode(tmc3, Circular); 46 | 47 | 48 | 49 | tmc_set_axi(tmc3, 0xf); 50 | tmc_set_size(tmc3, buf_size); 51 | tmc_set_data_buf(tmc3, buf_addr); 52 | tmc_set_read_pt(tmc3, buf_addr); 53 | tmc_set_write_pt(tmc3, buf_addr); 54 | 55 | tmc_enable(tmc1); 56 | tmc_enable(tmc2); 57 | tmc_enable(tmc3); 58 | 59 | return ; 60 | } 61 | 62 | /* 63 | Registers and config necessary CS components for multiprocessor tracing 64 | */ 65 | void cs_config_etr_mp(uint64_t buf_addr, uint32_t buf_size) { 66 | printf(" ------Coresight Configure Using ETR------\n"); 67 | printf(" Buffer Address: 0x%lx\n", buf_addr); 68 | printf(" Buffer Size : %d (bytes)\n", buf_size); 69 | printf(" -------------- End Info -----------------\n\n"); 70 | 71 | etms[0] = (ETM_interface *) cs_register(A53_0_etm); 72 | etms[1] = (ETM_interface *) cs_register(A53_1_etm); 73 | etms[2] = (ETM_interface *) cs_register(A53_2_etm); 74 | etms[3] = (ETM_interface *) cs_register(A53_3_etm); 75 | replicator = (Replicator_interface *) cs_register(Replic); 76 | funnel1 = (Funnel_interface *) cs_register(Funnel1); 77 | funnel2 = (Funnel_interface *) cs_register(Funnel2); 78 | tmc1 = (TMC_interface *) cs_register(Tmc1); 79 | tmc2 = (TMC_interface *) cs_register(Tmc2); 80 | tmc3 = (TMC_interface *) cs_register(Tmc3); 81 | 82 | funnel_unlock(funnel1); 83 | funnel_unlock(funnel2); 84 | funnel_config_port(funnel1, 0xf, 0); 85 | funnel_config_port(funnel2, 0x4, 0); 86 | 87 | tmc_unlock(tmc1); 88 | tmc_unlock(tmc2); 89 | tmc_unlock(tmc3); 90 | tmc_disable(tmc1); 91 | tmc_disable(tmc2); 92 | tmc_disable(tmc3); 93 | tmc_set_mode(tmc1, Hard); 94 | tmc_set_mode(tmc2, Hard); 95 | tmc_set_mode(tmc3, Circular); 96 | tmc1->formatter_flush_ctrl = 0x1; 97 | tmc2->formatter_flush_ctrl = 0x1; 98 | tmc3->formatter_flush_ctrl = 0x1; 99 | tmc_set_axi(tmc3, 0xf); 100 | tmc_set_size(tmc3, buf_size); 101 | tmc_set_data_buf(tmc3, buf_addr); 102 | tmc_set_read_pt(tmc3, buf_addr); 103 | tmc_set_write_pt(tmc3, buf_addr); 104 | 105 | tmc_enable(tmc1); 106 | tmc_enable(tmc2); 107 | tmc_enable(tmc3); 108 | 109 | return ; 110 | } 111 | /* 112 | a generic etm config, most functions are disabled, non-invasive 113 | */ 114 | void config_etm() 115 | { 116 | etm_unlock(etm); 117 | etm_disable(etm); 118 | etm_reset(etm); 119 | etm_set_cid(etm); 120 | etm_set_stall(etm, 0); 121 | } 122 | 123 | 124 | /* 125 | stall = 0 for non-intrusive trace 126 | */ 127 | void config_etm_n(ETM_interface* etm_n, int stall, int id) 128 | { 129 | etm_unlock(etm_n); 130 | etm_disable(etm_n); 131 | etm_reset(etm_n); // reset would set id to 1 132 | etm_n->trace_id = id; // so assign a none conflict id 133 | etm_set_cid(etm_n); 134 | etm_set_stall(etm_n, stall); 135 | } 136 | 137 | void config_etm_addr_event_test(ETM_interface *etm, uint64_t addr1, uint64_t addr2, uint64_t addr3, uint64_t addr4) 138 | { 139 | etm_set_addr_cmp(etm, 0, addr1, 1); 140 | etm_set_addr_cmp(etm, 1, addr2, 1); 141 | etm_set_addr_cmp(etm, 2, addr3, 1); 142 | etm_set_addr_cmp(etm, 3, addr4, 1); 143 | 144 | etm_set_rs(etm, 2, Single_addr, 0,0,0,0); 145 | etm_set_rs(etm, 3, Single_addr, 1,0,0,0); 146 | etm_set_rs(etm, 4, Single_addr, 2,0,0,0); 147 | etm_set_rs(etm, 5, Single_addr, 3,0,0,0); 148 | 149 | etm_set_event_sel_0(etm, 2, 0); 150 | etm_set_event_sel_1(etm, 3, 0); 151 | etm_set_event_sel_2(etm, 4, 0); 152 | etm_set_event_sel_3(etm, 5, 0); 153 | 154 | etm_set_event_trc(etm, 0xf, 0); 155 | } 156 | 157 | void config_etm_single_pmu_event_test(ETM_interface *etm, int event_bus) 158 | { 159 | 160 | // use External Input Selector 0 to event_but 161 | etm_set_ext_input(etm, event_bus, 0); 162 | 163 | // use Resource Selector 2 to monitor External Input Selector 0, thus the second zero is ignored, as the group is not Counter_Seq 164 | // last two zeros means not inverse the resutls 165 | etm_set_rs(etm, 2, External_input, 0, 0, 0, 0); 166 | 167 | // Use sel_0 position in trace stream to indicate the firing of Resource Selector 2, the ending 0 means not using the Resource as a pair 168 | etm_set_event_sel_0(etm, 2, 0); 169 | 170 | // enable the LSB in the bit mask to allow ETM insert event to trace stream 171 | // ending 0 means do not insert atb trigger if applicable 172 | etm_set_event_trc(etm, 1, 0); 173 | 174 | } 175 | -------------------------------------------------------------------------------- /csc/src/zcu_cs.c: -------------------------------------------------------------------------------- 1 | #include "zcu_cs.h" 2 | #include "cs_soc.h" 3 | #include "cs_etm.h" 4 | #include "cs_pmu.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | volatile void* cs_register(enum component comp) 15 | { 16 | volatile void* ptr = NULL; 17 | int fd = open("/dev/mem", O_RDWR | O_SYNC); 18 | if (fd < 0) { 19 | perror("Cannot open /dev/mem\n"); 20 | exit(1); 21 | } 22 | 23 | switch(comp) { 24 | case Funnel0: 25 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL0); 26 | break; 27 | case Funnel1: 28 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL1); 29 | break; 30 | case Funnel2: 31 | ptr = mmap(NULL, sizeof(Funnel_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + FUNNEL2); 32 | break; 33 | case Tmc1: 34 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC1); 35 | break; 36 | case Tmc2: 37 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC2); 38 | break; 39 | case Tmc3: 40 | ptr = mmap(NULL, sizeof(TMC_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TMC3); 41 | break; 42 | case Replic: 43 | ptr = mmap(NULL, sizeof(Replicator_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + REPLIC); 44 | break; 45 | case Tpiu: 46 | printf("IMPORTANT NOTICE!\n"); 47 | printf("If you are trying to use TPIU, then on ZCU102/Kria, you need to connect jumper J88\n"); 48 | printf("Reference https://support.xilinx.com/s/article/66669?language=en_US\n"); 49 | ptr = mmap(NULL, sizeof(TPIU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + TPIU); 50 | break; 51 | case Cti0: 52 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI0); 53 | break; 54 | case Cti1: 55 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI1); 56 | break; 57 | case Cti2: 58 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + CTI2); 59 | break; 60 | case A53_0_etm: 61 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_ETM); 62 | break; 63 | case A53_0_pmu: 64 | ptr = mmap(NULL, sizeof(PMU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_PMU); 65 | break; 66 | case A53_0_cti: 67 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_CTI); 68 | break; 69 | case A53_0_debug: 70 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_0_DEBUG); 71 | break; 72 | case A53_1_etm: 73 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_1_ETM); 74 | break; 75 | case A53_1_pmu: 76 | ptr = mmap(NULL, sizeof(PMU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_1_PMU); 77 | break; 78 | case A53_1_cti: 79 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_1_CTI); 80 | break; 81 | case A53_1_debug: 82 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_1_DEBUG); 83 | break; 84 | case A53_2_etm: 85 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_2_ETM); 86 | break; 87 | case A53_2_pmu: 88 | ptr = mmap(NULL, sizeof(PMU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_2_PMU); 89 | break; 90 | case A53_2_cti: 91 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_2_CTI); 92 | break; 93 | case A53_2_debug: 94 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_2_DEBUG); 95 | break; 96 | case A53_3_etm: 97 | ptr = mmap(NULL, sizeof(ETM_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_3_ETM); 98 | break; 99 | case A53_3_pmu: 100 | ptr = mmap(NULL, sizeof(PMU_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_3_PMU); 101 | break; 102 | case A53_3_cti: 103 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_3_CTI); 104 | break; 105 | case A53_3_debug: 106 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + A53_3_DEBUG); 107 | break; 108 | case R5_0_cti: 109 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + R5_0_CTI); 110 | break; 111 | case R5_1_cti: 112 | ptr = mmap(NULL, sizeof(CTI_interface), PROT_READ | PROT_WRITE, MAP_SHARED, fd, CS_BASE + R5_1_CTI); 113 | break; 114 | default: 115 | fprintf(stderr, "Unimplemented component %d\n", comp); 116 | exit(1); 117 | break; 118 | } 119 | 120 | if (ptr == MAP_FAILED) 121 | fprintf(stderr,"mmap to component %d failed!\n", comp); 122 | close(fd); 123 | 124 | #ifdef DEBUG 125 | #endif 126 | 127 | return ptr; 128 | } 129 | -------------------------------------------------------------------------------- /paper_imp/tracee/main/start.cpp: -------------------------------------------------------------------------------- 1 | #include "buffer.h" 2 | #include "cs_etm.h" 3 | #include "cs_config.h" 4 | #include "cs_soc.h" 5 | #include "pmu_event.h" 6 | #include "zcu_cs.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | extern ETM_interface *etm; 28 | 29 | tuple read_ms(string fname) 30 | { 31 | vector v; 32 | ifstream msfile(fname); 33 | string str; 34 | if (msfile.is_open()) 35 | { 36 | while (getline(msfile, str, ',')) 37 | { 38 | cout << str << endl; 39 | v.push_back(stoul(str, nullptr, 16)); 40 | } 41 | } 42 | else 43 | { 44 | cout << "Milestone file faied to open" << endl; 45 | } 46 | size_t num_ms = v.size(); 47 | uint32_t *ms = (uint32_t *)malloc(sizeof(uint32_t) * v.size()); 48 | for (size_t i = 0; i < num_ms; i++) 49 | { 50 | ms[i] = v[i]; 51 | } 52 | return make_pair(ms, num_ms); 53 | } 54 | 55 | int get_file_size(char *fname) 56 | { 57 | FILE *fp = fopen(fname, "rb"); 58 | int sz; 59 | fseek(fp, 0L, SEEK_END); 60 | sz = ftell(fp); 61 | return sz; 62 | } 63 | 64 | tuple read_msg(char *fname) 65 | { 66 | int size = get_file_size(fname); 67 | uint32_t *buf = (uint32_t *)malloc(size); 68 | FILE *fp = fopen(fname, "rb"); 69 | fread(buf, sizeof(uint32_t), size / sizeof(uint32_t), fp); 70 | return make_pair(buf, size / sizeof(uint32_t)); 71 | } 72 | 73 | void write_ms_time(uint32_t *ms, uint32_t *ms_time, int ms_size) 74 | { 75 | ofstream msfile("ms_timing.txt"); 76 | if (!msfile.is_open()) 77 | { 78 | cout << "ERROR: cannot open ms_timing.txt, timing info does not write to " 79 | "the file" 80 | << endl; 81 | return; 82 | } 83 | for (int i = 0; i < ms_size; i++) 84 | { 85 | msfile << "0x" << hex << ms[i]; 86 | if (i == ms_size - 1) 87 | { 88 | msfile << endl; 89 | } 90 | else 91 | { 92 | msfile << ","; 93 | } 94 | } 95 | for (int i = 0; i < ms_size; i++) 96 | { 97 | msfile << dec << ms_time[i]; 98 | if (i == ms_size - 1) 99 | { 100 | msfile << endl; 101 | } 102 | else 103 | { 104 | msfile << ","; 105 | } 106 | } 107 | cout << "Timing info write to ms_timing.txt" << endl; 108 | } 109 | 110 | void pmu_event_setup(unsigned int e0, unsigned int e1, unsigned int e2, unsigned int e3, unsigned int e4, unsigned int e5) { 111 | arm_perf_disable_counter(0x3f); 112 | arm_perf_set_ctrl(ARM_PERF_PMCR_P); 113 | arm_perf_type0(e0); 114 | arm_perf_type1(e1); 115 | arm_perf_type2(e2); 116 | arm_perf_type3(e3); 117 | arm_perf_type4(e4); 118 | arm_perf_type5(e5); 119 | } 120 | 121 | int main(int argc, char *argv[]) 122 | { 123 | 124 | // set up ETR buffer. R5 refer to RPU's TCM 125 | #ifdef R5 126 | uint64_t buf_addr = R5_0_ATCM + 0x8000; 127 | uint32_t buf_size = 8 * 1024 * 4; 128 | #else 129 | uint64_t buf_addr = 0xb0000000; 130 | uint32_t buf_size = 256 * 1024 * 1024; 131 | clear_buffer(buf_addr, buf_size); 132 | #endif 133 | 134 | char app[256]; 135 | char *app_farg = NULL; 136 | char milestone_path[256]; 137 | uint64_t start_addr=0; 138 | uint64_t end_addr=0; 139 | ms_t ms_mode; 140 | uint32_t *ms_ptr; 141 | uint32_t ms_size; 142 | 143 | parse_args(argc, argv, app, &app_farg, milestone_path, &ms_mode, &start_addr, &end_addr); 144 | 145 | if (ms_mode == SEQUENCE) 146 | { 147 | printf("Sequence milestone mode\n"); 148 | tie(ms_ptr, ms_size) = read_ms(milestone_path); 149 | } 150 | else if (ms_mode == GRAPH) 151 | { 152 | printf("Graph milestone mode\n"); 153 | tie(ms_ptr, ms_size) = read_msg(milestone_path); 154 | } 155 | 156 | // config Coresight infrascture 157 | cs_config_etr(buf_addr, buf_size); 158 | config_etm(); 159 | 160 | // child would execl the target application, parent would wait till finish and 161 | // collect the results 162 | pid_t pid = 0; 163 | pid = fork(); 164 | if (pid == 0) 165 | { 166 | 167 | // pin child to core 0 168 | cpu_set_t set; 169 | CPU_ZERO(&set); 170 | CPU_SET(0, &set); 171 | sched_setaffinity(0, sizeof(cpu_set_t), &set); 172 | sched_yield(); 173 | 174 | // ETM only trace child pid 175 | uint64_t child_pid = getpid(); 176 | etm_set_contextid_cmp(etm, (uint64_t)child_pid); 177 | 178 | if (start_addr != 0 && end_addr != 0) { 179 | etm_register_start_stop_addr(etm, start_addr, end_addr); 180 | } 181 | 182 | // Write MS data to OCM so that RPU can read 183 | int mem_fd = open("/dev/mem", O_RDWR | O_SYNC); 184 | uint32_t *ms_buff = 185 | (uint32_t *)mmap(0, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, 186 | mem_fd, 0xfffc0000); 187 | 188 | uint32_t i; 189 | for (i = 0; i < ms_size; ++i) 190 | ms_buff[2 + i] = ms_ptr[i]; 191 | ms_buff[1] = ms_size; 192 | ms_buff[0] = 0xdeadbeef; 193 | 194 | for (i = 0; i < 4; ++i) 195 | { 196 | if (ms_mode == SEQUENCE) 197 | { 198 | etm_register_range(etm, ms_ptr[i], ms_ptr[i], 1); 199 | } 200 | else if (ms_mode == GRAPH) 201 | { 202 | etm_register_range(etm, 0, 0, 1); 203 | // etm_register_single_addr_match_event(etm, milestones[i]); 204 | } 205 | } 206 | printf("Driver finished config, wait for Tracer init...\n"); 207 | sleep(2); 208 | 209 | // set PMU event, but do not start counting. The counting should start by RPU responding to the 1st MS hit. 210 | //pmu_event_setup(ARM_PERF_EVENT_DC2W, ARM_PERF_EVENT_DC2R, 0,0,0,0); 211 | //arm_perf_enable_counter(3); 212 | //arm_perf_set_ctrl(ARM_PERF_PMCR_E); 213 | 214 | // enable ETM and run the application 215 | munmap((uint32_t*)0xfffc0000, getpagesize()); 216 | etm_enable(etm); 217 | //int test0,test1; 218 | //test0 = arm_perf_counter0(); 219 | //test1 = arm_perf_counter1(); 220 | //printf("test counter %d %d \n", test0, test1); 221 | execl(app, app, app_farg, NULL); 222 | fprintf(stderr, "ERROR: execl failed.\n"); 223 | } 224 | else if (pid > 0) 225 | { 226 | wait(NULL); 227 | sleep(1); 228 | etm_disable(etm); 229 | 230 | // int mem_fd = open("/dev/mem", O_RDWR | O_SYNC); 231 | // uint32_t *ms_buff = 232 | // (uint32_t *)mmap(0, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, 233 | // mem_fd, 0xfffc0000); 234 | // ms_buff = &ms_buff[2]; 235 | 236 | // int i = 0; 237 | // while (ms_buff[i] != 0) 238 | // { 239 | // printf("RPU: %d: %u\n\r", i, ms_buff[i]); 240 | // i++; 241 | // } 242 | 243 | // write_ms_time(ms_ptr, ms_buff, ms_size); 244 | dump_buffer(buf_addr, buf_size); 245 | #ifdef R5 246 | system("sed -i 's/0xDEADBEEF/0x00000000/g' ../output/trace_1.out"); 247 | #endif 248 | 249 | return 0; 250 | } 251 | else 252 | { 253 | perror("Fork failed\n"); 254 | return 1; 255 | } 256 | 257 | return 0; 258 | } 259 | -------------------------------------------------------------------------------- /paper_imp/cfg/tracer.py: -------------------------------------------------------------------------------- 1 | from as_cf_utils import BR_INS, UB_INS, CB_INS, BL_INS, RET_INS, ALL_BRANCH_INS 2 | from as_cf_utils import read_as, parse_section 3 | from as_cf_utils import inside 4 | from basic_block import Lean_BB 5 | 6 | class Tracer: 7 | 8 | def __init__(self, cfg, strip_trace, limit=-1, watch_points = None, trace_range=None): 9 | self.history = [] 10 | self.strip_trace = strip_trace 11 | self.cfg = cfg 12 | self.cur_bb = None 13 | self.trace_range = trace_range 14 | self.stat = {'trace_on_cnt': 0, 'async_cnt': 0, 'interrupt_cnt': 0} 15 | self.watch_points = watch_points 16 | self.watch_points_history = [] 17 | 18 | if trace_range is None: 19 | if watch_points is None: 20 | self.world_line(limit=limit) 21 | else: 22 | self.world_line_watch(limit=limit) 23 | else: 24 | self.world_line_addr(limit=limit, trace_range = trace_range) 25 | self.acc_history = self.get_accumulate_history() 26 | self.rt_access = self.get_rt_access() 27 | self.cur_elapse = (0, {}) 28 | 29 | ## TODO: to save a rt call dict for all bb access is too expensive. We only need the instructions in between 30 | ## thus make a ongoing counting field, whenever a MS is placed, attach this field to the record BB. 31 | 32 | def proc(self, e, i): 33 | if e[0] == 'S': # Async 34 | self.stat['async_cnt'] += 1 35 | self.cur_bb = None 36 | return 'S' 37 | elif e[0] == 'A': # Address 38 | addr = int(e[1:], 16) 39 | self.cur_bb = self.cfg.find_bb(addr) 40 | self.history.append(self.cur_bb) 41 | if self.cur_bb is not None: 42 | self.cur_bb.total_hit += 1 43 | elif e == 'BE': 44 | if self.cur_bb is not None: 45 | end_asm = self.cur_bb.content[-1] 46 | if end_asm.ins == 'ret' or end_asm.ins == 'br' or end_asm.ins == 'blr': 47 | return 48 | self.cur_bb = self.cur_bb.e_succ_bb 49 | self.cur_bb.total_hit += 1 50 | self.history.append(self.cur_bb) 51 | elif e == 'BN': 52 | if self.cur_bb is not None: 53 | dbg_tmp = self.cur_bb 54 | try: 55 | self.cur_bb = self.cur_bb.n_succ_bb 56 | self.cur_bb.total_hit += 1 57 | except AttributeError: 58 | print(self.history[-10:]) 59 | print(f'At {dbg_tmp} atom N received!') 60 | exit() 61 | self.history.append(self.cur_bb) 62 | elif e == 'O': # Trace On 63 | self.stat['trace_on_cnt'] += 1 64 | self.cur_bb = None 65 | return 'O' 66 | elif e == '>': # trace start 67 | self.cur_bb = None 68 | pass 69 | elif e[:2] == 'I:': # interrupt 70 | self.stat['interrupt_cnt'] += 1 71 | self.cur_bb = None 72 | return e 73 | elif e == 'IR': 74 | # self.cur_bb = None # don't do this, since ETM emit the return address, then the IR package 75 | # self.watch_points_history.pop(-1) 76 | return 'IR' 77 | elif e == 'X': 78 | self.cur_bb = None 79 | print('OVERFLOW') 80 | return 'X' 81 | else: 82 | print(e) 83 | assert False 84 | 85 | def world_line_addr(self, limit=-1): 86 | """ the basic block access chronologically""" 87 | if self.trace_range: 88 | start_addr, end_addr = self.trace_range 89 | start_flag = False 90 | i = 0 91 | with open(self.strip_trace, 'r') as f: 92 | for i, e in enumerate(f): 93 | if i == limit: 94 | break 95 | 96 | if e.strip() == f'A{hex(start_addr)}': 97 | start_flag = True 98 | 99 | if start_flag: 100 | self.proc(e.strip(), i) 101 | if self.cur_bb and self.cur_bb.has_addr(end_addr): 102 | print(f'exit at {hex(end_addr)}') 103 | break 104 | #try: 105 | # self.proc(e.strip(), i) 106 | #except AttributeError: 107 | # print(f'Attribute Error at line {i}, handling {e}') 108 | # exit(0) 109 | if not start_flag: 110 | print(f'failed to find entry point {hex(start_addr)}. It might not be in address packet') 111 | print(f'World line trace done') 112 | return self.history 113 | 114 | def world_line(self, limit=-1): 115 | i = 0 116 | with open(self.strip_trace, 'r') as f: 117 | for i, e in enumerate(f): 118 | if i == limit: 119 | break 120 | self.proc(e.strip(), i) 121 | 122 | print(f'World line trace done') 123 | return self.history 124 | 125 | def world_line_watch(self, limit=-1, debug=False): 126 | i = 0 127 | with open(self.strip_trace, 'r') as f: 128 | for i, e in enumerate(f): 129 | if i == limit: 130 | break 131 | symbol = self.proc(e.strip(), i) 132 | if debug and symbol: 133 | self.watch_points_history.append(symbol) 134 | 135 | if self.cur_bb: 136 | if self.cur_bb in self.watch_points: 137 | # print(f'watch_pt #{i} {self.cur_bb}') 138 | self.watch_points_history.append((self.cur_bb, i)) 139 | 140 | print(f'World line trace done') 141 | return self.history 142 | 143 | 144 | def get_accumulate_history(self): 145 | acc_history = {} 146 | for bb in self.cfg.bbs: 147 | acc_history[bb] = 0 148 | for bb in self.history: 149 | if bb is None: 150 | continue 151 | acc_history[bb] += 1 152 | print('accumulated history done') 153 | return acc_history 154 | 155 | def get_rt_access(self): 156 | rt_access = {} 157 | for rt in self.cfg.routines: 158 | rt_access[rt] = 0 159 | for bb, n_acc in self.acc_history.items(): 160 | if bb.content[-1].ins == 'bl': 161 | e_succ_rt = bb.e_succ_bb.rt 162 | rt_access[e_succ_rt] += 1 163 | print('rt access done') 164 | return rt_access 165 | 166 | def bb_visited(self, bb): 167 | if bb in self.history: 168 | return True 169 | return False 170 | 171 | def history_viewer(self): 172 | pt = 0 173 | print(f'Total records: {len(self.history)}') 174 | print(f'cur bb: {self.history[pt]}') 175 | while(True): 176 | op = input() 177 | if op=='n' or op=='': 178 | pt += 1 179 | elif op=='p': 180 | pt -= 1 181 | elif op=='info': 182 | print(f'Total {len(self.history)}') 183 | else: 184 | pt = int(op) 185 | 186 | print(f'pt {pt}: {self.history[pt]}') 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /paper_imp/cfg/lean_cfg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import argparse 4 | import networkx as nx 5 | import pygraphviz as pgv 6 | 7 | from colorama import Fore 8 | from colorama import Style 9 | from matplotlib import pyplot as plt 10 | from as_cf_utils import BR_INS, UB_INS, CB_INS, BL_INS, RET_INS, ALL_BRANCH_INS, DUMMY_INS 11 | from as_cf_utils import read_as, parse_section 12 | from as_cf_utils import inside 13 | from basic_block import Lean_BB 14 | from tracer import Tracer 15 | 16 | class CFGLean: 17 | 18 | def __init__(self, fname, section='.text') -> None: 19 | """ routine refers to a function or subroutine 20 | routine.content can access individual line assembly code 21 | each assembly is represented by INS object 22 | """ 23 | 24 | print(f'CFGLean initialized') 25 | print(f'Input : {fname}') 26 | print(f'section: {section}') 27 | 28 | self.section = read_as(fname, section=section) 29 | self.routines = parse_section(self.section) 30 | self.addr_continuity_check() ## some binary has holes, e.g. discontinuity in the objdump file address, this function would raise a warning, in practice, this is not a problem. 31 | self.bbs = self.blockorize() ## blockorize generates basic blocks 32 | self.plain_link() ## upon blockorization, the branching info is missing. plain_link would link blocks together based on their branching info. This would however, not link `ret` nor `br` nor `blr` as these are not infereable statically 33 | self.tunnel_link() ## create a link from a `bl` basic block to the basic block at (address + 0x4). Which means each bl BB has two out edge. One for the true branch address, i.e. the address where bl branch to. One for the logical return address, i.e. addressOf(bl) + 0x4 34 | self.natural_link() 35 | self.oedge_ct_renew() 36 | 37 | from _cfg_solver import solve_rt, gather_rt_stat, get_milestones, get_sub_milestones, get_imm_milestone, produce_chain 38 | from _cfg_graph import to_graph, nxg2pgv, nxg2pgv_msg, visual_pgv 39 | from _cfg_rt_eval import rt_eval, gather_rts, attach_weight, special_rt, calc_smallest_weight, info_weight_path 40 | 41 | ##################### 42 | # utils (invariance)# 43 | ##################### 44 | 45 | def all_asm_iter(self): 46 | for r in self.routines: 47 | for i in r.content: 48 | yield i 49 | 50 | def find_bb(self, addr): 51 | low = 0 52 | high = len(self.bbs) - 1 53 | while (low <= high): 54 | mid = (low + high) >> 1 55 | if (self.bbs[mid].has_addr(addr)): 56 | return self.bbs[mid] 57 | elif (addr < self.bbs[mid].content[0].addr): 58 | high = mid - 1 59 | else: 60 | low = mid + 1 61 | return None 62 | 63 | def find_rt_bb(self, r_name): 64 | rt = self.find_routine(r_name) 65 | return list(bb for bb in self.bbs if bb.rt is rt) 66 | 67 | def find_routine(self, r_name): 68 | if type(r_name) is not str: 69 | r_name = r_name.name_strip 70 | for r in self.routines: 71 | if r.name == '<'+r_name+'>': 72 | return r 73 | assert False, f'{r_name} failed to be found in CFG' 74 | 75 | def addr_continuity_check(self): 76 | cur_end = self.routines[0].content[-1].addr 77 | for r in self.routines[1:]: 78 | try: 79 | assert cur_end + 4 == r.content[0].addr, "assembly has holes!" 80 | except AssertionError: 81 | print(f'WARNING: assembly has holes {hex(cur_end)}, be cautious if this is in user define routine') 82 | cur_end = r.content[-1].addr 83 | 84 | 85 | ##################### 86 | # initialization # 87 | ##################### 88 | 89 | def blockorize(self): 90 | just_close = True 91 | bbs, content = [], [] 92 | for inst in self.all_asm_iter(): 93 | if just_close and inst.ins in DUMMY_INS: 94 | continue 95 | just_close = False 96 | content.append(inst) 97 | if inst.ins in ALL_BRANCH_INS: 98 | bbs.append(Lean_BB(content)) 99 | content = [] 100 | just_close = True 101 | return bbs 102 | 103 | #################################### 104 | # link methods (part of init) # 105 | #################################### 106 | 107 | def plain_link(self): 108 | """ Link addr explicit BB branch, this excludes ret and br """ 109 | for bb in self.bbs: 110 | if bb.content[-1].esuccessor_addr: 111 | e_bb = self.find_bb(bb.content[-1].esuccessor_addr) 112 | assert e_bb is not None, "plain link e_bb" 113 | bb.update_link_e_to(e_bb) 114 | if bb.content[-1].is_cb: 115 | n_bb = self.find_bb(bb.content[-1].addr + 4) 116 | assert n_bb is not None, "plain link check" 117 | bb.update_link_n_to(n_bb) 118 | 119 | def tunnel_link(self): 120 | """ Create a tunnel between bl and returning bb""" 121 | for bb in self.bbs: 122 | if bb.content[-1].is_link: 123 | ret_bb = self.find_bb(bb.content[-1].addr + 4) 124 | if ret_bb is None: 125 | offend_asm = bb.content[-1].raw_line.strip() 126 | print(f'{Fore.YELLOW}{offend_asm} has no return BB.') 127 | print(f'Ignore this message if it\' a routine call to {Style.RESET_ALL}') 128 | continue 129 | bb.update_link_ret_to(ret_bb) 130 | 131 | def natural_link(self): 132 | for bb in self.bbs: 133 | bb.update_natural_succ() 134 | 135 | def oedge_ct_renew(self): 136 | for bb in self.bbs: 137 | bb.update_n_oedge() 138 | 139 | 140 | 141 | if __name__ == '__main__': 142 | import os 143 | 144 | parser = argparse.ArgumentParser(description='Analysis an objdump file') 145 | parser.add_argument('fname', type=str) 146 | parser.add_argument('--graph', action='store_true') 147 | parser.add_argument('-caller', type=str) 148 | args = parser.parse_args() 149 | 150 | fpath = f'../demo/application/{args.fname}.dp' 151 | if not os.path.exists(fpath): 152 | print(f'Require {args.fname}.dp presenting in the demo/application directory.') 153 | print(f'The .dp files have to be generated by specific objdump. See README') 154 | exit() 155 | 156 | cfg = CFGLean(fpath, section='all') 157 | 158 | if(args.graph): 159 | os.makedirs(args.fname, exist_ok=True) 160 | for rt in cfg.routines: 161 | rt_name = rt.name[1:-1] 162 | rt_stat = cfg.solve_rt(rt_name) 163 | cfg.visual_pgv(rt_stat, args.fname) 164 | print(f'Output visualization in {args.fname}') 165 | 166 | if(args.caller): 167 | l = [] 168 | for bb in cfg.bbs: 169 | for asm in bb.content: 170 | if(asm.ins=='bl' and asm.to_rt_name == f'<{args.caller}>'): 171 | l.append(asm.addr) 172 | l.sort() 173 | l = list(hex(addr) for addr in l) 174 | l = ','.join(l) 175 | with open('ms.txt', 'w') as f: 176 | f.write(l) 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /paper_imp/demo/milestone_graphs/mg/disparity.tmsg.dot: -------------------------------------------------------------------------------- 1 | digraph "" { 2 | node [label="\N", 3 | shape=record, 4 | style=filled 5 | ]; 6 | "BB 0x404620 - 0x40465c type: bl" [label="*** Reason for MS *** 7 | \lEntry 8 | \l*** Block Info *** 9 | \lgetDisparity 404620 stp 10 | \lgetDisparity 404624 mov 11 | \lgetDisparity 404628 \ 12 | ldr 13 | \lgetDisparity 40462c stp 14 | \lgetDisparity 404630 mov 15 | \lgetDisparity 404634 stp 16 | \lgetDisparity 404638 ldp 17 | \lgetDisparity \ 18 | 40463c stp 19 | \lgetDisparity 404640 stp 20 | \lgetDisparity 404644 mov 21 | \lgetDisparity 404648 stp 22 | \lgetDisparity 40464c mov 23 | \lgetDisparity \ 24 | 404650 mov 25 | \lgetDisparity 404654 mov 26 | \lgetDisparity 404658 mov 27 | \lgetDisparity 40465c bl E:401ec8 28 | \lfSetArray"]; 29 | "BB 0x404660 - 0x404670 type: bl" [label="*** Reason for MS *** 30 | \l408506 has exceeded thresh 10000 31 | \l*** Block Info *** 32 | \lgetDisparity 404660 mov 33 | \lgetDisparity 404664 \ 34 | mov 35 | \lgetDisparity 404668 str 36 | \lgetDisparity 40466c mov 37 | \lgetDisparity 404670 bl E:402180 38 | \liSetArray"]; 39 | "BB 0x404620 - 0x40465c type: bl" -> "BB 0x404660 - 0x404670 type: bl" [label="{}"]; 40 | "BB 0x404674 - 0x404688 type: bl" [label="*** Reason for MS *** 41 | \l408481 has exceeded thresh 10000 42 | \l*** Block Info *** 43 | \lgetDisparity 404674 mov 44 | \lgetDisparity 404678 \ 45 | add 46 | \lgetDisparity 40467c mov 47 | \lgetDisparity 404680 asr 48 | \lgetDisparity 404684 mov 49 | \lgetDisparity 404688 bl E:402180 50 | \liSetArray"]; 51 | "BB 0x404660 - 0x404670 type: bl" -> "BB 0x404674 - 0x404688 type: bl" [label="{}"]; 52 | "BB 0x4046a8 - 0x4046b4 type: bl" [label="*** Reason for MS *** 53 | \l705213 has exceeded thresh 10000 54 | \l*** Block Info *** 55 | \lgetDisparity 4046a8 mov 56 | \lgetDisparity 4046ac \ 57 | mov 58 | \lgetDisparity 4046b0 mov 59 | \lgetDisparity 4046b4 bl E:404ba0 60 | \lpadarray2"]; 61 | "BB 0x404674 - 0x404688 type: bl" -> "BB 0x4046a8 - 0x4046b4 type: bl" [label="{}"]; 62 | "BB 0x4046b8 - 0x4046d4 type: bl" [label="*** Reason for MS *** 63 | \l705429 has exceeded thresh 10000 64 | \l*** Block Info *** 65 | \lgetDisparity 4046b8 mov 66 | \lgetDisparity 4046bc \ 67 | ldp 68 | \lgetDisparity 4046c0 mov 69 | \lgetDisparity 4046c4 ldr 70 | \lgetDisparity 4046c8 mov 71 | \lgetDisparity 4046cc mov 72 | \lgetDisparity \ 73 | 4046d0 mov 74 | \lgetDisparity 4046d4 bl E:401ec8 75 | \lfSetArray"]; 76 | "BB 0x4046a8 - 0x4046b4 type: bl" -> "BB 0x4046b8 - 0x4046d4 type: bl" [label="{}"]; 77 | "BB 0x4046d8 - 0x4046e8 type: bl" [label="*** Reason for MS *** 78 | \l421972 has exceeded thresh 10000 79 | \l*** Block Info *** 80 | \lgetDisparity 4046d8 str 81 | \lgetDisparity 4046dc \ 82 | movi 83 | \lgetDisparity 4046e0 mov 84 | \lgetDisparity 4046e4 mov 85 | \lgetDisparity 4046e8 bl E:401ec8 86 | \lfSetArray"]; 87 | "BB 0x4046b8 - 0x4046d4 type: bl" -> "BB 0x4046d8 - 0x4046e8 type: bl" [label="{}"]; 88 | "BB 0x4046ec - 0x4046fc type: bl" [label="*** Reason for MS *** 89 | \l421701 has exceeded thresh 10000 90 | \l*** Block Info *** 91 | \lgetDisparity 4046ec str 92 | \lgetDisparity 4046f0 \ 93 | sub 94 | \lgetDisparity 4046f4 str 95 | \lgetDisparity 4046f8 sub 96 | \lgetDisparity 4046fc bl E:401c70 97 | \lfMallocHandle"]; 98 | "BB 0x4046d8 - 0x4046e8 type: bl" -> "BB 0x4046ec - 0x4046fc type: bl" [label="{}"]; 99 | "BB 0x404714 - 0x40471c type: b.le" [label="*** Reason for MS *** 100 | \l422219 has exceeded thresh 10000 101 | \l*** Block Info *** 102 | \lgetDisparity 404714 mov 103 | \lgetDisparity 404718 \ 104 | cmp 105 | \lgetDisparity 40471c b.le E:404768 "]; 106 | "BB 0x4046ec - 0x4046fc type: bl" -> "BB 0x404714 - 0x40471c type: b.le" [label="{}"]; 107 | "BB 0x404740 - 0x40475c type: bl" [label="*** Reason for MS *** 108 | \l1710625 has exceeded thresh 10000 109 | \l*** Block Info *** 110 | \lgetDisparity 404740 ldr 111 | \lgetDisparity 404744 \ 112 | mov 113 | \lgetDisparity 404748 mov 114 | \lgetDisparity 40474c mov 115 | \lgetDisparity 404750 mov 116 | \lgetDisparity 404754 mov 117 | \lgetDisparity \ 118 | 404758 add 119 | \lgetDisparity 40475c bl E:404b18 120 | \lfindDisparity"]; 121 | "BB 0x404714 - 0x40471c type: b.le" -> "BB 0x404740 - 0x40475c type: bl" [label="{}"]; 122 | "BB 0x404760 - 0x404764 type: b.ne" [label="*** Reason for MS *** 123 | \l564484 has exceeded thresh 10000 124 | \l*** Block Info *** 125 | \lgetDisparity 404760 cmp 126 | \lgetDisparity 404764 \ 127 | b.ne E:404720 "]; 128 | "BB 0x404740 - 0x40475c type: bl" -> "BB 0x404760 - 0x404764 type: b.ne" [label="{}"]; 129 | "BB 0x404760 - 0x404764 type: b.ne" -> "BB 0x404740 - 0x40475c type: bl" [label="{}"]; 130 | "BB 0x404770 - 0x404774 type: bl" [label="*** Reason for MS *** 131 | \l44201 has exceeded thresh 10000 132 | \l*** Block Info *** 133 | \lgetDisparity 404770 ldr 134 | \lgetDisparity 404774 bl \ 135 | E:403600 136 | \lfFreeHandle"]; 137 | "BB 0x404760 - 0x404764 type: b.ne" -> "BB 0x404770 - 0x404774 type: bl" [label="{}"]; 138 | "BB 0x404778 - 0x40477c type: bl" [label="*** Reason for MS *** 139 | \l43695 has exceeded thresh 10000 140 | \l*** Block Info *** 141 | \lgetDisparity 404778 ldr 142 | \lgetDisparity 40477c bl \ 143 | E:403600 144 | \lfFreeHandle"]; 145 | "BB 0x404770 - 0x404774 type: bl" -> "BB 0x404778 - 0x40477c type: bl" [label="{}"]; 146 | "BB 0x404780 - 0x404784 type: bl" [label="*** Reason for MS *** 147 | \l68592 has exceeded thresh 10000 148 | \l*** Block Info *** 149 | \lgetDisparity 404780 ldr 150 | \lgetDisparity 404784 bl \ 151 | E:403600 152 | \lfFreeHandle"]; 153 | "BB 0x404778 - 0x40477c type: bl" -> "BB 0x404780 - 0x404784 type: bl" [label="{}"]; 154 | "BB 0x404788 - 0x40478c type: bl" [label="*** Reason for MS *** 155 | \l70868 has exceeded thresh 10000 156 | \l*** Block Info *** 157 | \lgetDisparity 404788 ldr 158 | \lgetDisparity 40478c bl \ 159 | E:403610 160 | \liFreeHandle"]; 161 | "BB 0x404780 - 0x404784 type: bl" -> "BB 0x404788 - 0x40478c type: bl" [label="{}"]; 162 | "BB 0x404798 - 0x40479c type: bl" [label="*** Reason for MS *** 163 | \l68952 has exceeded thresh 10000 164 | \l*** Block Info *** 165 | \lgetDisparity 404798 mov 166 | \lgetDisparity 40479c bl \ 167 | E:403610 168 | \liFreeHandle"]; 169 | "BB 0x404788 - 0x40478c type: bl" -> "BB 0x404798 - 0x40479c type: bl" [label="{}"]; 170 | "BB 0x4047a0 - 0x4047a4 type: bl" [label="*** Reason for MS *** 171 | \l68778 has exceeded thresh 10000 172 | \l*** Block Info *** 173 | \lgetDisparity 4047a0 mov 174 | \lgetDisparity 4047a4 bl \ 175 | E:403610 176 | \liFreeHandle"]; 177 | "BB 0x404798 - 0x40479c type: bl" -> "BB 0x4047a0 - 0x4047a4 type: bl" [label="{}"]; 178 | "BB 0x4047a8 - 0x4047c4 type: ret" [label="*** Reason for MS *** 179 | \l68089 has exceeded thresh 10000 180 | \l*** Block Info *** 181 | \lgetDisparity 4047a8 mov 182 | \lgetDisparity 4047ac ldp 183 | \lgetDisparity \ 184 | 4047b0 ldp 185 | \lgetDisparity 4047b4 ldp 186 | \lgetDisparity 4047b8 ldp 187 | \lgetDisparity 4047bc ldp 188 | \lgetDisparity 4047c0 ldp 189 | \lgetDisparity \ 190 | 4047c4 ret "]; 191 | "BB 0x4047a0 - 0x4047a4 type: bl" -> "BB 0x4047a8 - 0x4047c4 type: ret" [label="{}"]; 192 | } 193 | --------------------------------------------------------------------------------