├── data └── locations_3_ref.txt ├── example ├── peakLocs_out.txt ├── locations_3_ref.txt ├── csynth.tcl ├── peakPicker_tb.m ├── peakPicker.hpp ├── peakPicker.m ├── peakPicker.cpp.backup ├── peakPicker.cpp ├── peakPicker_optimized.cpp ├── peakPicker_tb.cpp ├── Makefile └── vitis_hls.log ├── implementations └── peakPicker │ ├── csim.tcl │ ├── cosim.tcl │ ├── csynth.tcl │ ├── impl.tcl │ ├── export.tcl │ ├── performance_metrics.md │ ├── debug_reports │ └── peakPicker_debug_data_20250406_150212.json │ ├── peakPicker.hpp │ ├── peakPicker.cpp │ ├── Makefile │ ├── peakPicker_tb.cpp │ └── llm_response.md ├── requirements.txt ├── prompt_feedback.json ├── .gitignore ├── prompts ├── hls_conversion.md ├── hls_debugging.md ├── hls_generation.md ├── performance_optimization.md ├── documentation_template.md ├── readme_generation.md └── paper_generation.md ├── algorithms ├── peakPicker_tb.m └── peakPicker.m ├── workflows └── default_workflow.json ├── scripts ├── validate_report_parsing.py ├── Makefile └── optimize_hls_code.py ├── CLAUDE.md ├── readme.md └── agent_log.txt /data/locations_3_ref.txt: -------------------------------------------------------------------------------- 1 | 4806 2 | -------------------------------------------------------------------------------- /example/peakLocs_out.txt: -------------------------------------------------------------------------------- 1 | 4806 2 | -------------------------------------------------------------------------------- /example/locations_3_ref.txt: -------------------------------------------------------------------------------- 1 | 4806 2 | -------------------------------------------------------------------------------- /implementations/peakPicker/csim.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ../../data/locations_3_ref.txt 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt 7 | add_files -tb ../../data/threshold_in.txt 8 | open_solution solution1 9 | set_part {xc7k410t-ffg900-2} 10 | csim_design 11 | exit 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Required packages for LLM-Aided FPGA Design Flow 2 | 3 | # HTTP requests 4 | requests>=2.31.0 5 | 6 | # Environment variables 7 | python-dotenv>=1.0.0 8 | 9 | # LLM API clients 10 | openai>=1.3.0 11 | google-generativeai>=0.3.0 12 | anthropic>=0.5.0 13 | 14 | # Data handling 15 | numpy>=1.24.0 16 | pandas>=2.0.0 17 | 18 | # Visualization (optional) 19 | matplotlib>=3.7.0 20 | 21 | # Utility 22 | tqdm>=4.65.0 23 | -------------------------------------------------------------------------------- /example/csynth.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ./locations_3_ref.txt 6 | add_files -tb ./peakLocs_out.txt 7 | add_files -tb ./pssCorrMagSq_3_in.txt 8 | add_files -tb ./threshold_in.txt 9 | open_solution solution1 10 | set_part {xc7k410t-ffg900-2} 11 | create_clock -period 3.90 -name default 12 | set_clock_uncertainty 12.5% 13 | csynth_design 14 | exit 15 | -------------------------------------------------------------------------------- /implementations/peakPicker/cosim.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ../../data/locations_3_ref.txt 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt 7 | add_files -tb ../../data/threshold_in.txt 8 | open_solution solution1 9 | set_part {xc7k410t-ffg900-2} 10 | create_clock -period 3.90 -name default 11 | set_clock_uncertainty 12.5% 12 | cosim_design 13 | exit 14 | -------------------------------------------------------------------------------- /implementations/peakPicker/csynth.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ../../data/locations_3_ref.txt 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt 7 | add_files -tb ../../data/threshold_in.txt 8 | open_solution solution1 9 | set_part {xc7k410t-ffg900-2} 10 | create_clock -period 3.90 -name default 11 | set_clock_uncertainty 12.5% 12 | csynth_design 13 | exit 14 | -------------------------------------------------------------------------------- /implementations/peakPicker/impl.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ../../data/locations_3_ref.txt 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt 7 | add_files -tb ../../data/threshold_in.txt 8 | open_solution solution1 9 | set_part {xc7k410t-ffg900-2} 10 | create_clock -period 3.90 -name default 11 | set_clock_uncertainty 12.5% 12 | export_design -flow impl 13 | exit 14 | -------------------------------------------------------------------------------- /implementations/peakPicker/export.tcl: -------------------------------------------------------------------------------- 1 | open_project proj_peakPicker 2 | set_top peakPicker 3 | add_files peakPicker.cpp 4 | add_files -tb peakPicker_tb.cpp 5 | add_files -tb ../../data/locations_3_ref.txt 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt 7 | add_files -tb ../../data/threshold_in.txt 8 | open_solution solution1 9 | set_part {xc7k410t-ffg900-2} 10 | create_clock -period 3.90 -name default 11 | set_clock_uncertainty 12.5% 12 | export_design -format ip_catalog 13 | exit 14 | -------------------------------------------------------------------------------- /prompt_feedback.json: -------------------------------------------------------------------------------- 1 | {"prompt": "hls_generation", "timestamp": "2025-04-06T11:38:21.561719", "model": "gemini-2.5-pro-exp-03-25", "component": "peakPicker", "status": "success", "files_generated": 3, "file_types": ["peakPicker_tb.cpp", "peakPicker.hpp", "peakPicker.cpp"]} 2 | {"prompt": "hls_generation", "timestamp": "2025-04-06T15:00:21.333684", "model": "gemini-2.5-pro-exp-03-25", "component": "peakPicker", "status": "success", "files_generated": 3, "file_types": ["peakPicker.hpp", "peakPicker.cpp", "peakPicker_tb.cpp"]} 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | .vscode/ 3 | implementations/peakPicker/proj_peakPicker/ 4 | # implementations/peakPicker/*.md 5 | # implementations/peakPicker/*.py 6 | implementations/peakPicker/*.log 7 | implementations/peakPicker/*.bak 8 | # prompts/paper_generation.md 9 | # prompts/readme_generation.md 10 | # prompts/hls_generation.md 11 | # prompts/backups/ 12 | scripts/__pycache__/ 13 | # scripts/agent_cli.py 14 | # scripts/agent_framework.py 15 | # scripts/generate_documentation.py 16 | record/ 17 | example/proj_peakPicker 18 | # workflows/ 19 | /*_log.txt 20 | -------------------------------------------------------------------------------- /prompts/hls_conversion.md: -------------------------------------------------------------------------------- 1 | # Copilot Instructions for Peak Picker Implementation 2 | 3 | ## Project Context 4 | This project implements a critical component of a 5G NR SSB detection application. 5 | The peak picker algorithm identifies SSB signals by locating peaks where the 6 | magnitude squared of the PSS correlation (`xcorr`) exceeds a predefined threshold. 7 | 8 | ## Task Description 9 | Your task is to translate the MATLAB peak picker algorithm into efficient HLS C++ 10 | code while preserving exact functionality. The implementation should be optimized 11 | for FPGA deployment using Xilinx HLS directives. 12 | 13 | [Additional sections...] -------------------------------------------------------------------------------- /implementations/peakPicker/performance_metrics.md: -------------------------------------------------------------------------------- 1 | # Performance Metrics Report 2 | 3 | ## Resource Utilization 4 | 5 | | Implementation | LUT | FF | DSP | BRAM | URAM | SRL | 6 | |---------------|-----|----|----|------|---------|-----| 7 | | solution1 | 324 | 528 | 0 | 0 | 0 | 17 | 8 | 9 | ## Timing 10 | 11 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) | 12 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------| 13 | | solution1 | 3.90 | 256.41 | 3.46 | 288.77 | 3.01 | 331.79 | 14 | 15 | ## Latency 16 | 17 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) | 18 | |---------------|-------------|-------------|-----------------|-----------------------------| 19 | | solution1 | 6003 | 6003 | - | - | 20 | 21 | -------------------------------------------------------------------------------- /algorithms/peakPicker_tb.m: -------------------------------------------------------------------------------- 1 | % This is the testbench for the peakPicker function. 2 | % It reads the input signal from a file, calls the peakPicker function, 3 | % and writes the output to a file. 4 | % 5 | % Author: Jie Lei 6 | % Date: 03/30/2025 7 | % 8 | % Read the input cross correlation from a file. 9 | xcorr = readmatrix('pssCorrMagSq_3_in.txt','Delimiter', 'tab'); 10 | % Read the threshold from a file. 11 | threshold = readmatrix('threshold_in.txt','Delimiter', 'tab'); 12 | 13 | % Call the peakPicker function. 14 | [peakLocs] = peakPicker(xcorr, threshold); 15 | % Write the output to a file. 16 | writematrix(peakLocs, 'peakLocs_out.txt','Delimiter', 'tab'); 17 | % Read the reference output from a file. 18 | refLocs = readmatrix('locations_3_ref.txt','Delimiter', 'tab'); 19 | % Compare the output with the reference output. 20 | if isequal(peakLocs, refLocs) 21 | disp('Test passed: The output matches the reference output.'); 22 | else 23 | disp('Test failed: The output does not match the reference output.'); 24 | end -------------------------------------------------------------------------------- /example/peakPicker_tb.m: -------------------------------------------------------------------------------- 1 | % This is the testbench for the peakPicker function. 2 | % It reads the input signal from a file, calls the peakPicker function, 3 | % and writes the output to a file. 4 | % 5 | % Author: Jie Lei 6 | % Date: 03/30/2025 7 | % 8 | % Read the input cross correlation from a file. 9 | xcorr = readmatrix('pssCorrMagSq_3_in.txt','Delimiter', 'tab'); 10 | % Read the threshold from a file. 11 | threshold = readmatrix('threshold_in.txt','Delimiter', 'tab'); 12 | 13 | % Call the peakPicker function. 14 | [peakLocs] = peakPicker(xcorr, threshold); 15 | % Write the output to a file. 16 | writematrix(peakLocs, 'peakLocs_out.txt','Delimiter', 'tab'); 17 | % Read the reference output from a file. 18 | refLocs = readmatrix('locations_3_ref.txt','Delimiter', 'tab'); 19 | % Compare the output with the reference output. 20 | if isequal(peakLocs, refLocs) 21 | disp('Test passed: The output matches the reference output.'); 22 | else 23 | disp('Test failed: The output does not match the reference output.'); 24 | end -------------------------------------------------------------------------------- /implementations/peakPicker/debug_reports/peakPicker_debug_data_20250406_150212.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": "2025-04-06T15:02:12.379457", 3 | "files": [ 4 | "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.hpp", 5 | "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.cpp", 6 | "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker_tb.cpp" 7 | ], 8 | "error_analysis": { 9 | "primary_category": "unknown", 10 | "all_categories": [], 11 | "details": {} 12 | }, 13 | "bug_summary": "the error log.", 14 | "fix_summary": "the fix.\n * **If `csim_design` now passes (returns 0):** The original issue might have been misleadingly reported, or perhaps the indexing fix indirectly resolved the crash condition (unlikely but possible). Check if the test now reports PASSED or FAILED based on the corrected comparison logic.\n * **If `csim_design` still fails with `[SIM 100]`:** The crash is due to a different reason. Examine the `csim.log` file (usually found in the `/csim/build/` directory) for more detailed error messages (e.g., segmentation fault location, specific assertion failures). Further debugging would involve simplifying the input data, adding debug prints (`std::cout`) in the DUT (use sparingly and remove for synthesis), or checking the HLS tool environment.\n * Ensure the input files (`pssCorrMagSq_3_in.txt`, `threshold_in.txt`, `locations_3_ref.txt`) exist in the expected location relative to where the simulation is run (usually `//csim/build/`).", 15 | "model_used": "gemini-2.5-pro-exp-03-25" 16 | } -------------------------------------------------------------------------------- /example/peakPicker.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PEAKPICKER_HPP 2 | #define PEAKPICKER_HPP 3 | 4 | #ifdef __SYNTHESIS__ 5 | #include 6 | #include 7 | #include 8 | 9 | // Optimized data type definitions for lowest latency 10 | typedef ap_fixed<24, 6> data_t; // Reduced precision: 24-bit, 6 integer bits 11 | typedef ap_uint<16> index_t; // 16-bit unsigned integer for indices 12 | typedef ap_uint<8> count_t; // 8-bit counter for small counts 13 | #else 14 | // Testbench mode - use standard C++ types 15 | typedef double data_t; // Use double for testbench compatibility 16 | typedef unsigned short index_t; // 16-bit unsigned integer 17 | typedef unsigned char count_t; // 8-bit counter 18 | #endif 19 | 20 | // Algorithm parameters 21 | constexpr int WINDOW_LENGTH = 11; 22 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2; // 5 23 | constexpr int MAX_INPUT_SIZE = 6001; 24 | constexpr int MAX_PEAKS = 100; // Conservative estimate for maximum peaks 25 | 26 | #ifdef __SYNTHESIS__ 27 | // Optimized streaming interface function declaration 28 | void peakPicker( 29 | hls::stream& xcorr_stream, 30 | hls::stream& threshold_stream, 31 | index_t input_length, 32 | hls::stream& locations_stream, 33 | index_t* num_peaks 34 | ); 35 | #endif 36 | 37 | // Array-based interface for testbench compatibility and synthesis 38 | void peakPicker_wrapper( 39 | data_t xcorr[MAX_INPUT_SIZE], 40 | data_t threshold[MAX_INPUT_SIZE], 41 | index_t input_length, 42 | index_t locations[MAX_PEAKS], 43 | index_t* num_peaks 44 | ); 45 | 46 | // Main function declaration 47 | void peakPicker( 48 | data_t xcorr[MAX_INPUT_SIZE], 49 | data_t threshold[MAX_INPUT_SIZE], 50 | index_t input_length, 51 | index_t locations[MAX_PEAKS], 52 | index_t* num_peaks 53 | ); 54 | 55 | #endif // PEAKPICKER_HPP -------------------------------------------------------------------------------- /implementations/peakPicker/peakPicker.hpp: -------------------------------------------------------------------------------- 1 | /* AUTO-EDITED BY DEBUG ASSISTANT */ 2 | #ifndef PEAK_PICKER_HPP 3 | #define PEAK_PICKER_HPP 4 | 5 | #include 6 | #include 7 | #include // For integer types if needed for indices 8 | 9 | //-------------------------------------------------------------------------- 10 | // Constants and Parameters 11 | //-------------------------------------------------------------------------- 12 | 13 | // Fixed-point type configuration (Adjust W and I based on signal analysis) 14 | constexpr int DATA_W = 32; // Total width 15 | constexpr int DATA_I = 16; // Integer width (including sign bit) 16 | 17 | // Sliding window configuration (Matches MATLAB reference) 18 | constexpr int WINDOW_LENGTH = 11; 19 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2; // Index 5 (0-based) 20 | 21 | // Index type configuration (Ensure it can hold the maximum sample index) 22 | constexpr int INDEX_W = 32; // Width for location indices 23 | 24 | //-------------------------------------------------------------------------- 25 | // Type Definitions 26 | //-------------------------------------------------------------------------- 27 | 28 | // Fixed-point type for input data (xcorr and threshold) 29 | typedef ap_fixed Data_t; 30 | 31 | // Type for output location indices 32 | // Using ap_uint for non-negative indices. Use ap_int if indices can be negative. 33 | // Or simply use 'int' if standard integer sizes are sufficient and synthesizable. 34 | typedef ap_uint Index_t; 35 | // typedef int Index_t; // Alternative if standard int is sufficient 36 | 37 | //-------------------------------------------------------------------------- 38 | // Function Declaration 39 | //-------------------------------------------------------------------------- 40 | 41 | /** 42 | * @brief Finds peaks in a cross-correlation signal using a sliding window. 43 | * 44 | * @param xcorrStream Input stream of cross-correlation magnitude squared values. 45 | * @param thresholdStream Input stream of threshold values (corresponding to xcorr samples). 46 | * @param locationStream Output stream for detected peak location indices (0-based). 47 | * @param numSamples Total number of samples to process from the input streams. 48 | */ 49 | void peakPicker( 50 | hls::stream& xcorrStream, 51 | hls::stream& thresholdStream, 52 | hls::stream& locationStream, 53 | int numSamples 54 | ); 55 | 56 | #endif // PEAK_PICKER_HPP -------------------------------------------------------------------------------- /prompts/hls_debugging.md: -------------------------------------------------------------------------------- 1 | # HLS Code Debugging Assistant 2 | 3 | ## Task Description 4 | You are tasked with analyzing HLS C++ code that has encountered errors during compilation, simulation, or synthesis. You must identify the root causes of the errors and provide specific solutions. 5 | 6 | ## Source Files 7 | The following HLS C++ source files have been provided: 8 | 9 | {{SOURCE_FILES}} 10 | 11 | ## Error Log 12 | The following errors were encountered during the HLS process: 13 | 14 | {{ERROR_LOG}} 15 | 16 | ## Debugging Process 17 | 18 | Please follow this structured approach to debug the code: 19 | 20 | 1. **Error Analysis** 21 | - Categorize errors (compilation, simulation, synthesis, etc.) 22 | - Identify error patterns and relationships between multiple errors 23 | - Determine if errors are syntax-related, interface-related, or algorithm-related 24 | 25 | 2. **Root Cause Identification** 26 | - Locate the specific code causing each error 27 | - Analyze context surrounding the problematic code 28 | - Identify patterns of misuse of HLS constructs or C++ language features 29 | - Check for common HLS pitfalls: 30 | - Unsupported C++ features in HLS 31 | - Memory access pattern issues 32 | - Data type incompatibilities 33 | - Interface specification problems 34 | - Pragma-related issues 35 | 36 | 3. **Solution Development** 37 | - Propose specific fixes for each identified issue 38 | - Provide explanations for why the fixes will resolve the errors 39 | - Include code snippets showing the corrections 40 | - Address any potential side effects of the proposed changes 41 | 42 | 4. **Verification Guidance** 43 | - Suggest verification steps to ensure the fixes are correct 44 | - Recommend additional tests if appropriate 45 | - Provide guidance on preventing similar issues in the future 46 | 47 | ## IMPORTANT: Response Format 48 | 1. First, provide your analysis of the issue 49 | 2. Then, clearly indicate the start of the corrected code with "### COMPLETE CORRECTED SOURCE CODE:" 50 | 3. Provide the ENTIRE corrected source code file in a single code block, not just the changes 51 | 4. If you have multiple files, provide each file in a separate code block 52 | 5. Use the following format for code blocks: 53 | - For function code file 54 | **File: `{component}.cpp`** 55 | 56 | ```cpp 57 | // Your complete corrected code here 58 | ``` 59 | - For header file 60 | **File: `{component}.hpp`** 61 | 62 | ```cpp 63 | // Your complete corrected code here 64 | ``` 65 | - For test bench file 66 | **File: `{component}_tb.cpp`** 67 | 68 | ```cpp 69 | // Your complete corrected code here 70 | ``` 71 | ## BEST PRACTICES 72 | 73 | - Successfully resolved debugging issues on 2025-04-06 74 | 75 | - Successfully resolved debugging issues on 2025-04-06 76 | 77 | - Successfully resolved debugging issues on 2025-04-06 78 | 79 | - Successfully resolved debugging issues on 2025-04-06 80 | -------------------------------------------------------------------------------- /prompts/hls_generation.md: -------------------------------------------------------------------------------- 1 | # Copilot Instructions for {component} Implementation 2 | 3 | ## Project Context 4 | 5 | This project implements a component for an FPGA-based signal processing application. The {component} algorithm needs to be translated from a reference implementation to efficient HLS C++ code for FPGA deployment. 6 | 7 | ## Task Description 8 | 9 | Your task is to translate the reference {component} algorithm into efficient HLS C++ code while preserving exact functionality. The implementation should be optimized for FPGA deployment using Xilinx HLS directives. 10 | 11 | **Required Files:** 12 | 13 | - `{component}.hpp`: Header file with type definitions, function declarations, and parameters 14 | - `{component}.cpp`: Implementation file with the core algorithm 15 | - `{component}_tb.cpp`: C++ testbench that validates the implementation against reference data 16 | 17 | ## Input/Output Specifications 18 | 19 | - **Inputs:** 20 | - [To be specified based on {component} requirements] 21 | - **Outputs:** 22 | - [To be specified based on {component} requirements] 23 | 24 | ## Implementation Requirements 25 | 26 | ### Functional Requirements 27 | 28 | - Implement the `{component}()` function in HLS C++ with exactly the same behavior as the reference 29 | - Follow bit-accurate implementation of the reference algorithm (results must match reference within specified error margins) 30 | - Document code thoroughly with comments explaining the algorithm and optimization decisions 31 | 32 | ### Interface and Data Type Requirements 33 | 34 | - Use `hls::stream` interfaces with appropriate buffer depths for streaming data 35 | - Implement fixed-point arithmetic with `ap_fixed` (specify exact bit widths based on precision requirements) 36 | - Use `hls::complex>` for any complex number operations 37 | - Define all constant parameters in `{component}.hpp` using `#define` or `constexpr` 38 | - Create descriptive type aliases with `typedef` or `using` statements 39 | 40 | ### File I/O and Validation only in testbench file `{component}_tb.cpp` 41 | 42 | - Read input data from `{component}_in.txt` (one value per line) 43 | - Read threshold values from `threshold_in.txt` (one value per line) 44 | - Read reference output data from `{component}_ref.txt` (one value per line) 45 | - Implement robust error checking for file operations with clear error messages 46 | - Calculate and report both absolute and relative errors between your implementation and reference values 47 | 48 | ### Performance Optimization 49 | 50 | - Apply `#pragma HLS PIPELINE II=1` to performance-critical loops 51 | - Use `#pragma HLS DATAFLOW` for task-level pipelining 52 | - Implement arrays exceeding 64 elements using dual-port block RAM 53 | - Apply memory partitioning to arrays requiring parallel access 54 | - Configure optimization directives based on throughput requirements 55 | - Balance resource usage and performance based on target FPGA constraints 56 | 57 | ### Coding Style 58 | 59 | - Define all constants, types, and function declarations in `{component}.hpp` 60 | - Implement core algorithm in `{component}.cpp` with consistent style 61 | - Follow naming convention: camelCase for variables, PascalCase for types 62 | - Use self-documenting identifier names that clearly reflect their purpose 63 | 64 | ## Deliverables 65 | 66 | - Fully commented HLS C++ implementation files 67 | - Comprehensive testbench demonstrating functional correctness 68 | - Description of optimization approaches and their impact on performance 69 | -------------------------------------------------------------------------------- /algorithms/peakPicker.m: -------------------------------------------------------------------------------- 1 | % Modified from the original peakPicker.m file 2 | % to use a hardware-friendly implementation of peak finder 3 | % This function is used to find the locations of peaks in a cross-correlation 4 | % signal. It takes the cross-correlation signal, a threshold, and a window 5 | % length as inputs, and returns the locations of the detected peaks. 6 | % The function uses a sliding window approach to check for local maxima 7 | % within the specified window length. The middle sample of the window is 8 | % compared to the other samples in the window, and if it is greater than 9 | % the threshold, it is considered a peak. The function also ensures that 10 | % the detected peaks are within the specified window length. 11 | % The function is designed to be hardware-friendly, meaning it is optimized 12 | % for implementation on hardware platforms such as FPGAs or ASICs. It uses 13 | % a simple and efficient algorithm to find the peaks, avoiding complex 14 | % operations that may not be suitable for hardware implementation. 15 | % The function is written in MATLAB and can be used in various applications 16 | % such as signal processing, communications, and data analysis. 17 | % 18 | % The function takes the following inputs: 19 | % - xcorr: The cross-correlation signal, which is a matrix of size 20 | % (num_samples, num_sequences). Each column represents a different 21 | % sequence. 22 | % - threshold: The threshold value for peak detection, which is a vector 23 | % of size (num_samples, 1). The threshold is used to determine if a 24 | % sample is considered a peak. 25 | % - window_length: The length of the sliding window used for peak 26 | % detection. It is a scalar value that specifies the number of samples 27 | % to consider in the window. 28 | % 29 | % The function returns the following output: 30 | % - locations: A vector containing the indices of the detected peaks in 31 | % the cross-correlation signal. The indices are relative to the input 32 | % signal and indicate the locations of the detected peaks. 33 | % 34 | % Author: Jie Lei 35 | % Date: 03/31/2025 36 | % University of Technology Sydney 37 | 38 | function [locations]= peakPicker(xcorr,threshold) 39 | % Copyright 2021-2023 The MathWorks, Inc. 40 | 41 | locations=[]; 42 | window_length = 11; % Length of the sliding window 43 | middle_location=floor(window_length/2); 44 | xcorrBuffer = zeros(window_length, 1); % Preallocate buffer for current window 45 | thresholdBuffer = zeros(window_length, 1); % Preallocate buffer for threshold 46 | 47 | for index=1:length(xcorr)-window_length+1 48 | xcorrBuffer(2:end) = xcorrBuffer(1:end-1); % Shift buffer 49 | xcorrBuffer(1) = xcorr(index); % Add new sample to buffer 50 | thresholdBuffer(2:end) = thresholdBuffer(1:end-1); % Shift threshold buffer 51 | thresholdBuffer(1) = threshold(index); % Add new threshold to buffer 52 | if (index >= window_length) 53 | candidate_location = index - middle_location; 54 | % Hardware friendly implementation of peak finder 55 | MidSample = xcorrBuffer(middle_location+1,:); 56 | CompareOut = xcorrBuffer - MidSample; % this is a vector 57 | % if all values in the result are negative and the middle sample is 58 | % greater than a threshold, it is a local max 59 | if all(CompareOut <= 0) && (MidSample > thresholdBuffer(middle_location+1)) 60 | locations = [locations candidate_location]; %#ok 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /example/peakPicker.m: -------------------------------------------------------------------------------- 1 | % Modified from the original peakPicker.m file 2 | % to use a hardware-friendly implementation of peak finder 3 | % This function is used to find the locations of peaks in a cross-correlation 4 | % signal. It takes the cross-correlation signal, a threshold, and a window 5 | % length as inputs, and returns the locations of the detected peaks. 6 | % The function uses a sliding window approach to check for local maxima 7 | % within the specified window length. The middle sample of the window is 8 | % compared to the other samples in the window, and if it is greater than 9 | % the threshold, it is considered a peak. The function also ensures that 10 | % the detected peaks are within the specified window length. 11 | % The function is designed to be hardware-friendly, meaning it is optimized 12 | % for implementation on hardware platforms such as FPGAs or ASICs. It uses 13 | % a simple and efficient algorithm to find the peaks, avoiding complex 14 | % operations that may not be suitable for hardware implementation. 15 | % The function is written in MATLAB and can be used in various applications 16 | % such as signal processing, communications, and data analysis. 17 | % 18 | % The function takes the following inputs: 19 | % - xcorr: The cross-correlation signal, which is a matrix of size 20 | % (num_samples, num_sequences). Each column represents a different 21 | % sequence. 22 | % - threshold: The threshold value for peak detection, which is a vector 23 | % of size (num_samples, 1). The threshold is used to determine if a 24 | % sample is considered a peak. 25 | % - window_length: The length of the sliding window used for peak 26 | % detection. It is a scalar value that specifies the number of samples 27 | % to consider in the window. 28 | % 29 | % The function returns the following output: 30 | % - locations: A vector containing the indices of the detected peaks in 31 | % the cross-correlation signal. The indices are relative to the input 32 | % signal and indicate the locations of the detected peaks. 33 | % 34 | % Author: Jie Lei 35 | % Date: 03/31/2025 36 | % University of Technology Sydney 37 | 38 | function [locations]= peakPicker(xcorr,threshold) 39 | % Copyright 2021-2023 The MathWorks, Inc. 40 | 41 | locations=[]; 42 | window_length = 11; % Length of the sliding window 43 | middle_location=floor(window_length/2); 44 | xcorrBuffer = zeros(window_length, 1); % Preallocate buffer for current window 45 | thresholdBuffer = zeros(window_length, 1); % Preallocate buffer for threshold 46 | 47 | for index=1:length(xcorr)-window_length+1 48 | xcorrBuffer(2:end) = xcorrBuffer(1:end-1); % Shift buffer 49 | xcorrBuffer(1) = xcorr(index); % Add new sample to buffer 50 | thresholdBuffer(2:end) = thresholdBuffer(1:end-1); % Shift threshold buffer 51 | thresholdBuffer(1) = threshold(index); % Add new threshold to buffer 52 | if (index >= window_length) 53 | candidate_location = index - middle_location; 54 | % Hardware friendly implementation of peak finder 55 | MidSample = xcorrBuffer(middle_location+1,:); 56 | CompareOut = xcorrBuffer - MidSample; % this is a vector 57 | % if all values in the result are negative and the middle sample is 58 | % greater than a threshold, it is a local max 59 | if all(CompareOut <= 0) && (MidSample > thresholdBuffer(middle_location+1)) 60 | locations = [locations candidate_location]; %#ok 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /example/peakPicker.cpp.backup: -------------------------------------------------------------------------------- 1 | #include "peakPicker.hpp" 2 | 3 | #ifdef __SYNTHESIS__ 4 | #include 5 | #endif 6 | 7 | void peakPicker( 8 | data_t xcorr[MAX_INPUT_SIZE], 9 | data_t threshold[MAX_INPUT_SIZE], 10 | index_t input_length, 11 | index_t locations[MAX_PEAKS], 12 | index_t* num_peaks 13 | ) { 14 | #ifdef __SYNTHESIS__ 15 | // Set interface pragmas for optimal hardware interface 16 | #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem0 17 | #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem1 18 | #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem2 19 | #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem3 20 | #pragma HLS INTERFACE s_axilite port=input_length bundle=control 21 | #pragma HLS INTERFACE s_axilite port=return bundle=control 22 | #endif 23 | 24 | // Local buffers for sliding window implementation 25 | data_t xcorrBuffer[WINDOW_LENGTH]; 26 | data_t thresholdBuffer[WINDOW_LENGTH]; 27 | 28 | // Initialize buffers to zero 29 | init_buffers: for (int i = 0; i < WINDOW_LENGTH; i++) { 30 | #pragma HLS UNROLL 31 | xcorrBuffer[i] = 0; 32 | thresholdBuffer[i] = 0; 33 | } 34 | 35 | index_t peak_count = 0; 36 | 37 | // Main processing loop - sliding window peak detection 38 | main_loop: for (index_t index = 0; index < input_length - WINDOW_LENGTH + 1; index++) { 39 | #ifdef __SYNTHESIS__ 40 | #pragma HLS PIPELINE II=1 41 | #pragma HLS LOOP_TRIPCOUNT min=5991 max=5991 avg=5991 42 | #endif 43 | 44 | // Shift buffers and add new samples (hardware-friendly shift register) 45 | shift_xcorr: for (int i = WINDOW_LENGTH - 1; i > 0; i--) { 46 | #ifdef __SYNTHESIS__ 47 | #pragma HLS UNROLL 48 | #endif 49 | xcorrBuffer[i] = xcorrBuffer[i-1]; 50 | } 51 | xcorrBuffer[0] = xcorr[index]; 52 | 53 | shift_threshold: for (int i = WINDOW_LENGTH - 1; i > 0; i--) { 54 | #ifdef __SYNTHESIS__ 55 | #pragma HLS UNROLL 56 | #endif 57 | thresholdBuffer[i] = thresholdBuffer[i-1]; 58 | } 59 | thresholdBuffer[0] = threshold[index]; 60 | 61 | // Check for peak only after buffer is full 62 | if (index >= WINDOW_LENGTH - 1) { 63 | index_t candidate_location = index - MIDDLE_LOCATION; 64 | 65 | // Get middle sample for comparison 66 | data_t midSample = xcorrBuffer[MIDDLE_LOCATION]; 67 | data_t midThreshold = thresholdBuffer[MIDDLE_LOCATION]; 68 | 69 | // Check if middle sample is greater than threshold 70 | bool above_threshold = (midSample > midThreshold); 71 | 72 | // Hardware-friendly peak detection: check if middle sample is local maximum 73 | bool is_peak = true; 74 | peak_check: for (int i = 0; i < WINDOW_LENGTH; i++) { 75 | #ifdef __SYNTHESIS__ 76 | #pragma HLS UNROLL 77 | #endif 78 | if (i != MIDDLE_LOCATION) { 79 | if (xcorrBuffer[i] > midSample) { 80 | is_peak = false; 81 | } 82 | } 83 | } 84 | 85 | // Store peak location if conditions are met 86 | if (is_peak && above_threshold && peak_count < MAX_PEAKS) { 87 | locations[peak_count] = candidate_location + 1; // MATLAB 1-indexed 88 | peak_count++; 89 | } 90 | } 91 | } 92 | 93 | *num_peaks = peak_count; 94 | } -------------------------------------------------------------------------------- /prompts/performance_optimization.md: -------------------------------------------------------------------------------- 1 | # HLS Performance Optimization Guidelines 2 | 3 | ## Task Description 4 | You are tasked with optimizing an existing HLS C++ implementation to improve performance, reduce resource utilization, or achieve better timing. Your goal is to maintain functional correctness while enhancing design metrics. 5 | 6 | ## Source Files 7 | The following HLS C++ source files are being provided: 8 | 9 | {{SOURCE_FILES}} 10 | 11 | ## Performance Metrics 12 | Current performance metrics of the design: 13 | 14 | {{PERFORMANCE_METRICS}} 15 | 16 | ## Optimization Goals 17 | - Primary goal: {{PRIMARY_GOAL}} (e.g., "Reduce latency by at least 30%") 18 | - Secondary goal: {{SECONDARY_GOAL}} (e.g., "Maintain or reduce resource utilization") 19 | 20 | ## Optimization Strategy 21 | 22 | Please follow this structured approach: 23 | 24 | 1. **Design Analysis** 25 | - Analyze the algorithm structure and computational patterns 26 | - Identify performance bottlenecks in the current implementation 27 | - Map data dependencies and memory access patterns 28 | - Recognize rate-limiting operations or loops 29 | 30 | 2. **Optimization Techniques** 31 | 32 | Consider the following optimization categories: 33 | 34 | **Loop Optimizations:** 35 | - Pipeline loops to improve throughput (PIPELINE pragma) 36 | - Unroll loops to exploit parallelism (UNROLL pragma) 37 | - Merge loops to reduce overhead 38 | - Partition loops to enable better scheduling 39 | 40 | **Memory Optimizations:** 41 | - Array partitioning (ARRAY_PARTITION pragma) 42 | - Memory reshaping for better access patterns 43 | - Double buffering for overlapped computation 44 | - Streaming interfaces for sequential data (hls::stream) 45 | 46 | **Data Type Optimizations:** 47 | - Optimize bit widths using ap_fixed/ap_int 48 | - Convert floating-point to fixed-point where appropriate 49 | - Simplify complex operations with lookup tables or approximations 50 | 51 | **Function-Level Optimizations:** 52 | - Inline small functions to reduce function call overhead 53 | - Dataflow optimization for task-level pipelining 54 | - Function parallelism with multiple instances 55 | 56 | **Interface Optimizations:** 57 | - Optimize interface protocols (AXI4, AXI-Lite, AXI-Stream) 58 | - Burst transfers for efficient data movement 59 | - Register slicing for timing improvement 60 | 61 | 3. **Implementation Plan** 62 | - Prioritize optimizations based on impact vs. effort 63 | - Plan incremental changes that can be verified individually 64 | - Consider trade-offs between different metrics (latency vs. area) 65 | 66 | ## Response Format 67 | 68 | Please structure your response as follows: 69 | 70 | ### 1. Design Analysis 71 | A summary of your analysis of the current implementation, identifying bottlenecks and opportunities. 72 | 73 | ### 2. Recommended Optimizations 74 | For each file requiring changes: 75 | - The file name 76 | - Description of proposed optimizations 77 | - Code snippets showing the modifications with added pragmas or code changes 78 | - Expected impact of each optimization 79 | 80 | ### 3. Implementation Priority 81 | A prioritized list of optimizations, explaining which should be implemented first. 82 | 83 | ### 4. Expected Outcomes 84 | Predictions about the performance improvements that could be achieved. 85 | 86 | ### 5. Verification Plan 87 | Suggestions for verifying that the optimizations maintain functional correctness. 88 | 89 | ## Additional Guidelines 90 | - Focus on HLS-specific optimizations, not general C++ performance improvements 91 | - Explain the reasoning behind each optimization 92 | - Consider Xilinx/Intel FPGA architecture specifics when relevant 93 | - Indicate any potential risks or trade-offs for each optimization 94 | - When multiple approaches could work, explain the pros and cons of each 95 | -------------------------------------------------------------------------------- /workflows/default_workflow.json: -------------------------------------------------------------------------------- 1 | { 2 | "generate_code": { 3 | "agent": "code_generator", 4 | "context": { 5 | "args": { 6 | "matlab_file": [""], 7 | "prompt": "", 8 | "prompt_name": "hls_generation", 9 | "output_dir": "", 10 | "model": "gemini-2.5-pro-exp-03-25", 11 | "timeout": 600 12 | } 13 | }, 14 | "update_context": { 15 | "component_dir": "output.stdout" 16 | }, 17 | "next": "generate_makefile", 18 | "on_error": "stop" 19 | }, 20 | "generate_makefile": { 21 | "agent": "makefile_generator", 22 | "context": { 23 | "work_dir": "", 24 | "component": "", 25 | "template_makefile": "/scripts/Makefile" 26 | }, 27 | "next": "build_csim", 28 | "on_error": "stop" 29 | }, 30 | "build_csim": { 31 | "agent": "hls_builder", 32 | "context": { 33 | "work_dir": "", 34 | "target": "csim" 35 | }, 36 | "next": "build_csynth", 37 | "on_error": "debug_errors" 38 | }, 39 | "debug_errors": { 40 | "agent": "debug_assistant", 41 | "context": { 42 | "args": { 43 | "error_log": "/proj_/solution1/csim/report/_csim.log", 44 | "source_file": [ 45 | "/.hpp", 46 | "/.cpp", 47 | "/_tb.cpp" 48 | ], 49 | "model": "gemini-2.5-pro-exp-03-25", 50 | "timeout": 300 51 | } 52 | }, 53 | "next": "build_csim", 54 | "on_error": "build_csynth" 55 | }, 56 | "build_csynth": { 57 | "agent": "hls_builder", 58 | "context": { 59 | "work_dir": "", 60 | "target": "csynth" 61 | }, 62 | "next": { 63 | "context.get('optimize', False) == True": "optimize_code", 64 | "default": "build_cosim" 65 | }, 66 | "on_error": "stop" 67 | }, 68 | "optimize_code": { 69 | "agent": "performance_optimizer", 70 | "context": { 71 | "args": { 72 | "source_dir": "", 73 | "prompt_name": "performance_optimization", 74 | "model": "gemini-2.5-pro-exp-03-25", 75 | "primary_goal": "Reduce latency by at least 20%", 76 | "secondary_goal": "Maintain resource utilization" 77 | } 78 | }, 79 | "next": "build_csynth_optimized", 80 | "on_error": "build_cosim" 81 | }, 82 | "build_csynth_optimized": { 83 | "agent": "hls_builder", 84 | "context": { 85 | "work_dir": "", 86 | "target": "csynth" 87 | }, 88 | "next": "build_cosim", 89 | "on_error": "build_cosim" 90 | }, 91 | "build_cosim": { 92 | "agent": "hls_builder", 93 | "context": { 94 | "work_dir": "", 95 | "target": "cosim" 96 | }, 97 | "next": "export_ip", 98 | "on_error": "stop" 99 | }, 100 | "export_ip": { 101 | "agent": "hls_builder", 102 | "context": { 103 | "work_dir": "", 104 | "target": "export_ip" 105 | }, 106 | "next": "build_impl", 107 | "on_error": "build_impl" 108 | }, 109 | "build_impl": { 110 | "agent": "hls_builder", 111 | "context": { 112 | "work_dir": "", 113 | "target": "impl" 114 | }, 115 | "next": "generate_documentation", 116 | "on_error": "generate_documentation" 117 | }, 118 | "generate_documentation": { 119 | "agent": "documentation_generator", 120 | "context": { 121 | "component_dir": "", 122 | "component": "", 123 | "output_format": ["readme", "paper"], 124 | "model": "gemini-2.5-pro-exp-03-25" 125 | }, 126 | "on_error": "stop" 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /example/peakPicker.cpp: -------------------------------------------------------------------------------- 1 | #include "peakPicker.hpp" 2 | 3 | #ifdef __SYNTHESIS__ 4 | #include 5 | #endif 6 | 7 | // Ultra-optimized implementation targeting II=1 8 | void peakPicker_wrapper( 9 | data_t xcorr[MAX_INPUT_SIZE], 10 | data_t threshold[MAX_INPUT_SIZE], 11 | index_t input_length, 12 | index_t locations[MAX_PEAKS], 13 | index_t* num_peaks 14 | ) { 15 | #ifdef __SYNTHESIS__ 16 | // Optimized interface pragmas - consolidated AXI bundles 17 | #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem max_read_burst_length=256 18 | #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem max_read_burst_length=256 19 | #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem_out max_write_burst_length=100 20 | #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem_out 21 | #pragma HLS INTERFACE s_axilite port=input_length bundle=control 22 | #pragma HLS INTERFACE s_axilite port=return bundle=control 23 | #endif 24 | 25 | // Optimized circular buffer using explicit shift register pattern 26 | data_t xcorr_sr[WINDOW_LENGTH]; 27 | data_t threshold_sr[WINDOW_LENGTH]; 28 | 29 | #ifdef __SYNTHESIS__ 30 | // Complete array partitioning for parallel access 31 | #pragma HLS ARRAY_PARTITION variable=xcorr_sr complete dim=1 32 | #pragma HLS ARRAY_PARTITION variable=threshold_sr complete dim=1 33 | #endif 34 | 35 | // Initialize shift registers 36 | init_sr: for (int i = 0; i < WINDOW_LENGTH; i++) { 37 | #ifdef __SYNTHESIS__ 38 | #pragma HLS UNROLL 39 | #endif 40 | xcorr_sr[i] = 0; 41 | threshold_sr[i] = 0; 42 | } 43 | 44 | index_t peak_count = 0; 45 | 46 | // Ultra-optimized main loop - processes all samples including initial window fill 47 | ultra_main_loop: for (index_t idx = 0; idx < input_length; idx++) { 48 | #ifdef __SYNTHESIS__ 49 | #pragma HLS PIPELINE II=1 50 | #pragma HLS LOOP_TRIPCOUNT min=6001 max=6001 avg=6001 51 | // Force dependency analysis to avoid false dependencies 52 | #pragma HLS DEPENDENCE variable=xcorr_sr inter false 53 | #pragma HLS DEPENDENCE variable=threshold_sr inter false 54 | #endif 55 | 56 | // Read new samples 57 | data_t new_xcorr = xcorr[idx]; 58 | data_t new_threshold = threshold[idx]; 59 | 60 | // Explicit shift register implementation (completely unrolled) 61 | // This avoids memory dependency issues 62 | // Manual shift register unrolling for maximum performance 63 | xcorr_sr[10] = xcorr_sr[9]; 64 | xcorr_sr[9] = xcorr_sr[8]; 65 | xcorr_sr[8] = xcorr_sr[7]; 66 | xcorr_sr[7] = xcorr_sr[6]; 67 | xcorr_sr[6] = xcorr_sr[5]; 68 | xcorr_sr[5] = xcorr_sr[4]; 69 | xcorr_sr[4] = xcorr_sr[3]; 70 | xcorr_sr[3] = xcorr_sr[2]; 71 | xcorr_sr[2] = xcorr_sr[1]; 72 | xcorr_sr[1] = xcorr_sr[0]; 73 | xcorr_sr[0] = new_xcorr; 74 | 75 | threshold_sr[10] = threshold_sr[9]; 76 | threshold_sr[9] = threshold_sr[8]; 77 | threshold_sr[8] = threshold_sr[7]; 78 | threshold_sr[7] = threshold_sr[6]; 79 | threshold_sr[6] = threshold_sr[5]; 80 | threshold_sr[5] = threshold_sr[4]; 81 | threshold_sr[4] = threshold_sr[3]; 82 | threshold_sr[3] = threshold_sr[2]; 83 | threshold_sr[2] = threshold_sr[1]; 84 | threshold_sr[1] = threshold_sr[0]; 85 | threshold_sr[0] = new_threshold; 86 | 87 | // Peak detection (starts after window is filled) 88 | if (idx >= WINDOW_LENGTH - 1) { 89 | // Get middle sample (index 5 for window of 11) 90 | data_t mid_xcorr = xcorr_sr[MIDDLE_LOCATION]; 91 | data_t mid_threshold = threshold_sr[MIDDLE_LOCATION]; 92 | 93 | // Threshold check 94 | bool above_threshold = (mid_xcorr > mid_threshold); 95 | 96 | // Parallel peak detection - fully unrolled comparison 97 | bool is_peak = (xcorr_sr[0] <= mid_xcorr) && 98 | (xcorr_sr[1] <= mid_xcorr) && 99 | (xcorr_sr[2] <= mid_xcorr) && 100 | (xcorr_sr[3] <= mid_xcorr) && 101 | (xcorr_sr[4] <= mid_xcorr) && 102 | // Skip middle element (index 5) 103 | (xcorr_sr[6] <= mid_xcorr) && 104 | (xcorr_sr[7] <= mid_xcorr) && 105 | (xcorr_sr[8] <= mid_xcorr) && 106 | (xcorr_sr[9] <= mid_xcorr) && 107 | (xcorr_sr[10] <= mid_xcorr); 108 | 109 | // Compute peak location 110 | index_t peak_location = idx - MIDDLE_LOCATION + 1; // MATLAB 1-indexed 111 | 112 | // Conditional peak storage 113 | if (is_peak && above_threshold && peak_count < MAX_PEAKS) { 114 | locations[peak_count] = peak_location; 115 | peak_count++; 116 | } 117 | } 118 | } 119 | 120 | *num_peaks = peak_count; 121 | } 122 | 123 | // Alias for backward compatibility 124 | void peakPicker( 125 | data_t xcorr[MAX_INPUT_SIZE], 126 | data_t threshold[MAX_INPUT_SIZE], 127 | index_t input_length, 128 | index_t locations[MAX_PEAKS], 129 | index_t* num_peaks 130 | ) { 131 | peakPicker_wrapper(xcorr, threshold, input_length, locations, num_peaks); 132 | } -------------------------------------------------------------------------------- /scripts/validate_report_parsing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import json 7 | import logging 8 | from pathlib import Path 9 | 10 | # Set up logging 11 | logging.basicConfig( 12 | level=logging.INFO, 13 | format="%(asctime)s [%(levelname)s] %(message)s", 14 | handlers=[logging.StreamHandler()] 15 | ) 16 | logger = logging.getLogger("report_validator") 17 | 18 | # Add the scripts directory to the path so we can import the agent classes 19 | script_dir = Path(__file__).parent 20 | sys.path.insert(0, str(script_dir)) 21 | 22 | # Import agent framework components 23 | from agent_framework import DocumentationAgent 24 | 25 | def validate_report_parsing(component_dir): 26 | """Validate the parsing of HLS implementation and latency reports.""" 27 | if not os.path.exists(component_dir): 28 | logger.error(f"Component directory not found: {component_dir}") 29 | return False 30 | 31 | # Create a temporary documentation agent for testing 32 | doc_agent = DocumentationAgent("test_doc_agent", "Test documentation agent") 33 | 34 | # Test report parsing 35 | try: 36 | logger.info(f"Analyzing reports in {component_dir}") 37 | metrics = doc_agent.analyze_reports(component_dir) 38 | 39 | if "error" in metrics: 40 | logger.error(f"Error in report analysis: {metrics['error']}") 41 | return False 42 | 43 | # Check if any metrics were found 44 | resource_count = sum(len(impl) for impl in metrics.get("resources", {}).values()) 45 | timing_count = sum(len(impl) for impl in metrics.get("timing", {}).values()) 46 | latency_count = len(metrics.get("latency", {})) 47 | 48 | logger.info(f"Found resource metrics: {resource_count} entries") 49 | logger.info(f"Found timing metrics: {timing_count} entries") 50 | logger.info(f"Found latency metrics: {latency_count} entries") 51 | 52 | # Check for specific expected fields 53 | if "resources" in metrics and metrics["resources"]: 54 | first_impl = next(iter(metrics["resources"])) 55 | first_res = metrics["resources"][first_impl] 56 | logger.info(f"Resource metrics example ({first_impl}): {first_res}") 57 | 58 | if "timing" in metrics and metrics["timing"]: 59 | first_impl = next(iter(metrics["timing"])) 60 | first_timing = metrics["timing"][first_impl] 61 | logger.info(f"Timing metrics example ({first_impl}): {first_timing}") 62 | 63 | # Verify MHz conversion 64 | for key, value in first_timing.items(): 65 | if key.endswith("_MHz"): 66 | logger.info(f" Found MHz conversion: {key} = {value}") 67 | 68 | if "latency" in metrics and metrics["latency"]: 69 | first_impl = next(iter(metrics["latency"])) 70 | first_latency = metrics["latency"][first_impl] 71 | logger.info(f"Latency metrics example ({first_impl}): {first_latency}") 72 | 73 | # Check if markdown report was generated 74 | md_report_path = os.path.join(component_dir, "performance_metrics.md") 75 | if os.path.exists(md_report_path): 76 | logger.info(f"Markdown performance report generated at: {md_report_path}") 77 | with open(md_report_path, 'r') as f: 78 | report_content = f.read() 79 | logger.info(f"Report length: {len(report_content)} characters") 80 | else: 81 | logger.warning(f"No markdown report found at: {md_report_path}") 82 | 83 | # Verify data extraction for documentation 84 | test_prompt = doc_agent._create_documentation_prompt( 85 | {"workflow_summary": {}}, 86 | metrics, 87 | component_dir, 88 | {"component": os.path.basename(component_dir)}, 89 | ["readme"], 90 | None 91 | ) 92 | 93 | logger.info(f"Generated documentation prompt length: {len(test_prompt)} characters") 94 | 95 | # Extract performance metrics section from the prompt 96 | import re 97 | perf_section = re.search(r"## Performance Metrics\s*\n(.*?)(?:\n##|\Z)", test_prompt, re.DOTALL) 98 | if perf_section: 99 | logger.info("Performance metrics section found in documentation prompt") 100 | performance_text = perf_section.group(1) 101 | logger.info(f"Performance section length: {len(performance_text)} characters") 102 | else: 103 | logger.warning("Performance metrics section not found in documentation prompt") 104 | 105 | return True 106 | 107 | except Exception as e: 108 | logger.error(f"Error validating report parsing: {e}") 109 | import traceback 110 | logger.error(traceback.format_exc()) 111 | return False 112 | 113 | def main(): 114 | parser = argparse.ArgumentParser(description="Validate HLS report parsing and documentation generation") 115 | parser.add_argument("--component_dir", type=str, required=True, 116 | help="Path to component directory containing HLS reports") 117 | 118 | args = parser.parse_args() 119 | 120 | success = validate_report_parsing(args.component_dir) 121 | 122 | if success: 123 | logger.info("Validation completed successfully") 124 | sys.exit(0) 125 | else: 126 | logger.error("Validation failed") 127 | sys.exit(1) 128 | 129 | if __name__ == "__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /implementations/peakPicker/peakPicker.cpp: -------------------------------------------------------------------------------- 1 | /* AUTO-EDITED BY DEBUG ASSISTANT */ 2 | #include "peakPicker.hpp" 3 | 4 | /** 5 | * @brief Core implementation of the peakPicker algorithm for HLS. 6 | * 7 | * Implements a sliding window peak detector. A sample is considered a peak if: 8 | * 1. It is the maximum value within a window of WINDOW_LENGTH samples centered around it. 9 | * 2. Its value exceeds the corresponding threshold value. 10 | * 11 | * This function processes streaming input data and outputs the 0-based indices of detected peaks. 12 | */ 13 | void peakPicker( 14 | hls::stream& xcorrStream, 15 | hls::stream& thresholdStream, 16 | hls::stream& locationStream, 17 | int numSamples 18 | ) { 19 | // Interface pragmas are usually placed here or in a separate directives file 20 | // #pragma HLS INTERFACE axis port=xcorrStream 21 | // #pragma HLS INTERFACE axis port=thresholdStream 22 | // #pragma HLS INTERFACE axis port=locationStream 23 | // #pragma HLS INTERFACE s_axilite port=numSamples bundle=control 24 | // #pragma HLS INTERFACE s_axilite port=return bundle=control 25 | 26 | // Buffers to hold the current window of data and thresholds 27 | // These implement the sliding window mechanism. 28 | Data_t xcorrBuffer[WINDOW_LENGTH]; 29 | Data_t thresholdBuffer[WINDOW_LENGTH]; 30 | 31 | // Partitioning the arrays allows parallel access to elements within the 32 | // pipelined loop, mapping them to registers for II=1. 33 | #pragma HLS ARRAY_PARTITION variable=xcorrBuffer complete dim=1 34 | #pragma HLS ARRAY_PARTITION variable=thresholdBuffer complete dim=1 35 | 36 | // Initialize buffers (optional, but good practice for simulation) 37 | // Can be skipped if the initial state doesn't affect the first valid output 38 | // Using an unrolled loop for initialization 39 | init_loop: 40 | for (int i = 0; i < WINDOW_LENGTH; ++i) { 41 | #pragma HLS UNROLL 42 | xcorrBuffer[i] = 0; 43 | thresholdBuffer[i] = 0; 44 | } 45 | 46 | // Main processing loop iterates through all input samples 47 | // Apply PIPELINE directive for high throughput (initiation interval II=1) 48 | main_loop: 49 | for (int i = 0; i < numSamples; ++i) { 50 | #pragma HLS PIPELINE II=1 51 | 52 | // 1. Shift Buffers: Make space for the new sample at index 0 53 | // Shift existing elements towards the end of the buffer 54 | shift_loop: 55 | for (int k = WINDOW_LENGTH - 1; k > 0; --k) { 56 | #pragma HLS UNROLL // Unroll this small loop for efficiency 57 | xcorrBuffer[k] = xcorrBuffer[k - 1]; 58 | thresholdBuffer[k] = thresholdBuffer[k - 1]; 59 | } 60 | 61 | // 2. Read New Samples: Read from input streams and place at the start (index 0) 62 | // Ensure streams are not empty before reading (HLS streams block if empty) 63 | Data_t newXcorrSample = xcorrStream.read(); 64 | Data_t newThresholdSample = thresholdStream.read(); 65 | xcorrBuffer[0] = newXcorrSample; 66 | thresholdBuffer[0] = newThresholdSample; 67 | 68 | // 3. Peak Detection Logic: Start after the window is filled 69 | // The first potential peak can be checked when i = WINDOW_LENGTH - 1 70 | // At this point, the sample corresponding to the middle of the *first full window* 71 | // is located at xcorrBuffer[MIDDLE_LOCATION]. This sample was originally read 72 | // at iteration i = (WINDOW_LENGTH - 1) - MIDDLE_LOCATION = i - MIDDLE_LOCATION. 73 | if (i >= WINDOW_LENGTH - 1) { 74 | // Get the sample and threshold at the center of the current window 75 | // This sample corresponds to the one read 'MIDDLE_LOCATION' iterations ago. 76 | Data_t middleSample = xcorrBuffer[MIDDLE_LOCATION]; 77 | Data_t middleThreshold = thresholdBuffer[MIDDLE_LOCATION]; 78 | 79 | // Check if the middle sample is the maximum in the window 80 | bool isPeak = true; // Assume it's a peak initially 81 | compare_loop: 82 | for (int k = 0; k < WINDOW_LENGTH; ++k) { 83 | #pragma HLS UNROLL // Unroll comparison loop for parallelism 84 | // Check if any other sample in the window is strictly greater 85 | // Note: MATLAB's `findpeaks` often uses >= (non-strictly greater), 86 | // but the reference comment suggests middleSample >= all others. 87 | // This implementation finds peaks strictly greater than neighbors. 88 | // If equality is allowed (plateaus), the condition might need adjustment. 89 | // The current logic: middleSample must be >= all other elements. 90 | if (k != MIDDLE_LOCATION && xcorrBuffer[k] > middleSample) { 91 | isPeak = false; 92 | // No need to 'break' here if the loop is fully unrolled, 93 | // as all comparisons happen in parallel. But break is harmless 94 | // in C simulation and might help logic synthesis slightly. 95 | break; 96 | } 97 | } 98 | 99 | // Check if the peak condition is met (local maximum AND above threshold) 100 | if (isPeak && (middleSample > middleThreshold)) { 101 | // Calculate the 0-based index of the peak in the original input stream 102 | // The sample currently at xcorrBuffer[MIDDLE_LOCATION] was read 103 | // MIDDLE_LOCATION iterations ago. Its original index was i - MIDDLE_LOCATION. 104 | Index_t peakLocation = i - MIDDLE_LOCATION; 105 | 106 | // Write the detected peak location (0-based index) to the output stream 107 | locationStream.write(peakLocation); 108 | } 109 | } 110 | } // end main_loop 111 | } -------------------------------------------------------------------------------- /example/peakPicker_optimized.cpp: -------------------------------------------------------------------------------- 1 | #include "peakPicker.hpp" 2 | 3 | #ifdef __SYNTHESIS__ 4 | #include 5 | #endif 6 | 7 | // Optimized streaming-based implementation for lowest latency 8 | void peakPicker( 9 | hls::stream& xcorr_stream, 10 | hls::stream& threshold_stream, 11 | index_t input_length, 12 | hls::stream& locations_stream, 13 | index_t* num_peaks 14 | ) { 15 | #ifdef __SYNTHESIS__ 16 | // Optimized interface pragmas for streaming 17 | #pragma HLS INTERFACE axis port=xcorr_stream 18 | #pragma HLS INTERFACE axis port=threshold_stream 19 | #pragma HLS INTERFACE axis port=locations_stream 20 | #pragma HLS INTERFACE s_axilite port=input_length bundle=control 21 | #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem 22 | #pragma HLS INTERFACE s_axilite port=return bundle=control 23 | 24 | // Dataflow optimization for task-level pipelining 25 | #pragma HLS DATAFLOW 26 | #endif 27 | 28 | // Optimized circular buffer implementation using shift registers 29 | data_t xcorr_window[WINDOW_LENGTH]; 30 | data_t threshold_window[WINDOW_LENGTH]; 31 | 32 | #ifdef __SYNTHESIS__ 33 | #pragma HLS ARRAY_PARTITION variable=xcorr_window complete dim=1 34 | #pragma HLS ARRAY_PARTITION variable=threshold_window complete dim=1 35 | #endif 36 | 37 | // Initialize windows 38 | init_window: for (int i = 0; i < WINDOW_LENGTH; i++) { 39 | #ifdef __SYNTHESIS__ 40 | #pragma HLS UNROLL 41 | #endif 42 | xcorr_window[i] = 0; 43 | threshold_window[i] = 0; 44 | } 45 | 46 | index_t peak_count = 0; 47 | 48 | // Main optimized processing loop with streaming 49 | main_processing: for (index_t idx = 0; idx < input_length; idx++) { 50 | #ifdef __SYNTHESIS__ 51 | #pragma HLS PIPELINE II=1 52 | #pragma HLS LOOP_TRIPCOUNT min=6001 max=6001 avg=6001 53 | #endif 54 | 55 | // Read from streams (single cycle operation) 56 | data_t xcorr_sample = xcorr_stream.read(); 57 | data_t threshold_sample = threshold_stream.read(); 58 | 59 | // Optimized shift register using unrolled operations 60 | #ifdef __SYNTHESIS__ 61 | #pragma HLS UNROLL 62 | #endif 63 | shift_registers: for (int i = WINDOW_LENGTH - 1; i > 0; i--) { 64 | xcorr_window[i] = xcorr_window[i-1]; 65 | threshold_window[i] = threshold_window[i-1]; 66 | } 67 | xcorr_window[0] = xcorr_sample; 68 | threshold_window[0] = threshold_sample; 69 | 70 | // Peak detection logic (only after window is filled) 71 | if (idx >= WINDOW_LENGTH - 1) { 72 | // Get middle sample 73 | data_t mid_xcorr = xcorr_window[MIDDLE_LOCATION]; 74 | data_t mid_threshold = threshold_window[MIDDLE_LOCATION]; 75 | 76 | // Threshold check 77 | bool above_threshold = (mid_xcorr > mid_threshold); 78 | 79 | // Parallel peak comparison using unrolled loop 80 | bool is_local_max = true; 81 | #ifdef __SYNTHESIS__ 82 | #pragma HLS UNROLL 83 | #endif 84 | peak_comparison: for (int i = 0; i < WINDOW_LENGTH; i++) { 85 | if (i != MIDDLE_LOCATION && xcorr_window[i] >= mid_xcorr) { 86 | is_local_max = false; 87 | } 88 | } 89 | 90 | // Output peak location if detected 91 | if (is_local_max && above_threshold && peak_count < MAX_PEAKS) { 92 | index_t peak_location = idx - MIDDLE_LOCATION + 1; // MATLAB 1-indexed 93 | locations_stream.write(peak_location); 94 | peak_count++; 95 | } 96 | } 97 | } 98 | 99 | *num_peaks = peak_count; 100 | } 101 | 102 | // Wrapper function for backward compatibility with array interface 103 | void peakPicker_wrapper( 104 | data_t xcorr[MAX_INPUT_SIZE], 105 | data_t threshold[MAX_INPUT_SIZE], 106 | index_t input_length, 107 | index_t locations[MAX_PEAKS], 108 | index_t* num_peaks 109 | ) { 110 | #ifdef __SYNTHESIS__ 111 | // Interface pragmas for wrapper 112 | #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem0 113 | #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem1 114 | #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem2 115 | #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem3 116 | #pragma HLS INTERFACE s_axilite port=input_length bundle=control 117 | #pragma HLS INTERFACE s_axilite port=return bundle=control 118 | 119 | #pragma HLS DATAFLOW 120 | #endif 121 | 122 | // Create streams 123 | static hls::stream xcorr_stream("xcorr_stream"); 124 | static hls::stream threshold_stream("threshold_stream"); 125 | static hls::stream locations_stream("locations_stream"); 126 | 127 | #ifdef __SYNTHESIS__ 128 | #pragma HLS STREAM variable=xcorr_stream depth=2 129 | #pragma HLS STREAM variable=threshold_stream depth=2 130 | #pragma HLS STREAM variable=locations_stream depth=100 131 | #endif 132 | 133 | // Feed input streams 134 | input_feeder: for (index_t i = 0; i < input_length; i++) { 135 | #ifdef __SYNTHESIS__ 136 | #pragma HLS PIPELINE II=1 137 | #endif 138 | xcorr_stream.write(xcorr[i]); 139 | threshold_stream.write(threshold[i]); 140 | } 141 | 142 | // Call optimized core function 143 | index_t temp_num_peaks; 144 | peakPicker(xcorr_stream, threshold_stream, input_length, locations_stream, &temp_num_peaks); 145 | 146 | // Read output stream 147 | output_collector: for (index_t i = 0; i < temp_num_peaks && i < MAX_PEAKS; i++) { 148 | #ifdef __SYNTHESIS__ 149 | #pragma HLS PIPELINE II=1 150 | #endif 151 | locations[i] = locations_stream.read(); 152 | } 153 | 154 | *num_peaks = temp_num_peaks; 155 | } -------------------------------------------------------------------------------- /prompts/documentation_template.md: -------------------------------------------------------------------------------- 1 | # General Documentation Template 2 | 3 | ## Context 4 | You are tasked with creating comprehensive documentation for an FPGA hardware accelerator design. This documentation should be appropriate for technical users who need to understand, use, or modify this hardware component. 5 | 6 | ## Component Information 7 | - **Component Name**: {component_name} 8 | - **Design Purpose**: [Extract from context] 9 | - **Generation Method**: AI-assisted design using LLM 10 | - **Target Platform**: Xilinx FPGA 11 | 12 | ## Documentation Structure 13 | 14 | Create thorough documentation following this structure: 15 | 16 | ### 1. Overview 17 | - Component purpose and functionality 18 | - Key features 19 | - Target applications 20 | - Design approach 21 | 22 | ### 2. Architecture 23 | - Block diagram description 24 | - Interface specification 25 | - Data flow 26 | - Key components 27 | 28 | **Architecture Visualization**: Include a Mermaid diagram showing the component architecture. Example: 29 | 30 | ```mermaid 31 | flowchart TD 32 | A["Input Interface"] --> B["Core Processing"] 33 | B --> C["Output Interface"] 34 | D["Control Logic"] --> B 35 | E["Memory"] <--> B 36 | ``` 37 | 38 | ### 3. Implementation Details 39 | - Algorithm description 40 | - HLS optimizations 41 | - Resource utilization 42 | - Performance characteristics 43 | 44 | **Algorithm Visualization**: Include appropriate diagrams showing the algorithm implementation: 45 | 46 | ```mermaid 47 | flowchart LR 48 | A["Data Input"] --> B["Stage 1"] 49 | B --> C["Stage 2"] 50 | C --> D["Stage 3"] 51 | D --> E["Data Output"] 52 | 53 | F["Configuration"] --> B 54 | F --> C 55 | F --> D 56 | ``` 57 | 58 | ### 4. Usage Guide 59 | - Integration instructions 60 | - API/interface description 61 | - Example usage 62 | - Configuration options 63 | 64 | **Integration Visualization**: Show integration flow with other components: 65 | 66 | ```mermaid 67 | flowchart LR 68 | A["Host System"] --> B["Driver"] 69 | B --> C["{component_name}"] 70 | C --> D["Memory"] 71 | C --> E["Other IP Cores"] 72 | ``` 73 | 74 | ### 5. Performance Analysis 75 | - Latency and throughput 76 | - Resource efficiency 77 | - Comparative metrics 78 | - Limitations 79 | 80 | **Performance Visualization**: Include resource utilization tables and charts: 81 | 82 | | Resource | Utilization | Available | Utilization % | 83 | |----------|-------------|-----------|---------------| 84 | | LUT | X | X_total | X_percent | 85 | | FF | Y | Y_total | Y_percent | 86 | | DSP | Z | Z_total | Z_percent | 87 | | BRAM | W | W_total | W_percent | 88 | 89 | 90 | Include performance metrics (Timing & Latency) in well-formatted tables: 91 | 92 | ## Timing 93 | 94 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) | 95 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------| 96 | | solution1 | 3.90 | 256.00 | u.uu | vvv.vv | x.xx | yyy.yy | 97 | 98 | ## Latency 99 | 100 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) | 101 | |---------------|-------------|-------------|-----------------|-----------------------------| 102 | | solution1 | X | Y | - | - | 103 | 104 | 105 | 106 | ### 6. Verification 107 | - Test methodology 108 | - Simulation results 109 | - Validation approach 110 | - Known issues 111 | 112 | **Verification Visualization**: Use sequence diagrams to show test procedures: 113 | 114 | ```mermaid 115 | sequenceDiagram 116 | participant TB as Testbench 117 | participant DUT as Design Under Test 118 | 119 | TB->>DUT: Initialize 120 | TB->>DUT: Apply Test Vector 1 121 | DUT-->>TB: Output Results 1 122 | TB->>TB: Verify Results 1 123 | TB->>DUT: Apply Test Vector 2 124 | DUT-->>TB: Output Results 2 125 | TB->>TB: Verify Results 2 126 | ``` 127 | 128 | ### 7. Development Process 129 | - Design evolution 130 | - Challenges and solutions 131 | - AI assistance insights 132 | - Optimization history 133 | 134 | **Development Visualization**: Use a state or flowchart diagram to show the design evolution: 135 | 136 | ```mermaid 137 | stateDiagram-v2 138 | [*] --> Initial_Design 139 | Initial_Design --> Functional_Implementation 140 | Functional_Implementation --> Performance_Optimization 141 | Performance_Optimization --> Resource_Optimization 142 | Resource_Optimization --> Final_Design 143 | Final_Design --> [*] 144 | ``` 145 | 146 | ## Source Information 147 | Use the following source material to inform your documentation: 148 | 149 | ### Source Code 150 | ```cpp 151 | // Header file 152 | {header_code} 153 | ``` 154 | 155 | ```cpp 156 | // Implementation file 157 | {implementation_code} 158 | ``` 159 | 160 | ```cpp 161 | // Testbench file 162 | {testbench_code} 163 | ``` 164 | 165 | ### Performance Metrics 166 | {performance_metrics} 167 | 168 | ### Implementation Challenges 169 | {errors_encountered} 170 | 171 | ### Debugging Methods 172 | {debugging_methods} 173 | 174 | ## Diagram Examples 175 | The following are examples of different types of Mermaid diagrams you can use: 176 | 177 | {diagram_examples} 178 | 179 | ## Chart Examples 180 | The following are examples of different types of tables/charts for performance data: 181 | 182 | {chart_examples} 183 | 184 | ## Style Guidelines 185 | - Use clear, professional language 186 | - Include code snippets and examples where helpful 187 | - Use proper technical terminology 188 | - Be concise but thorough 189 | - Use appropriate markdown formatting 190 | - Focus on practical usage information 191 | - Make effective use of diagrams and visualizations 192 | - Use Mermaid diagrams for architecture, flows, and algorithms 193 | - Present performance metrics in well-formatted tables 194 | - Ensure diagrams have clear labels and descriptions 195 | 196 | Your documentation should serve as a comprehensive reference for this hardware component, with visual elements that enhance understanding. 197 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Project Overview 6 | 7 | This repository implements an LLM-aided FPGA design flow that converts MATLAB algorithms to optimized HLS C++ implementations. The primary focus is on automating the design workflow using Large Language Models (Claude, GPT-4, Gemini) for 5G NR signal processing components, specifically peak picking algorithms for SSB detection. 8 | 9 | ## Key Architecture Components 10 | 11 | ### 1. Multi-Stage LLM Pipeline 12 | - **Code Generation**: MATLAB → HLS C++ conversion using structured prompts 13 | - **Automated Debugging**: AI-powered error analysis and code correction 14 | - **Agent Framework**: Orchestrates multiple LLM services with fallback mechanisms 15 | 16 | ### 2. Core Scripts (scripts/) 17 | - `generate_hls_code.py`: Main code generation from MATLAB to HLS C++ 18 | - `debug_assistant.py`: LLM-based debugging of C simulation errors 19 | - `agent_framework.py`: Agent orchestration and prompt management 20 | - Support for Gemini (primary), OpenAI, and Claude APIs with automatic fallback 21 | 22 | ### 3. Prompt Engineering System (prompts/) 23 | - Structured templates for different tasks (hls_conversion.md, hls_debugging.md, etc.) 24 | - Domain-specific prompts for 5G signal processing 25 | - Performance optimization and documentation generation templates 26 | 27 | ## Common Development Commands 28 | 29 | ### HLS Project Development (in implementations/peakPicker/) 30 | ```bash 31 | # Full development cycle 32 | make all # Clean, build testbench, run C sim and synthesis 33 | 34 | # Individual stages 35 | make tb # Build and run standalone C++ testbench 36 | make csim # Run HLS C simulation 37 | make csynth # Run HLS C synthesis 38 | make cosim # Run C/RTL co-simulation 39 | make export_ip # Export as IP catalog 40 | make impl # Run Vivado implementation 41 | make clean # Clean generated files 42 | make help # Show all available targets 43 | ``` 44 | 45 | ### Code Generation Workflow 46 | ```bash 47 | # Generate HLS from MATLAB (from repository root) 48 | python3 scripts/generate_hls_code.py \ 49 | --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m \ 50 | --prompt prompts/hls_conversion.md \ 51 | --model gemini-2.0-flash-thinking-exp 52 | 53 | # Debug C simulation errors 54 | python3 scripts/debug_assistant.py \ 55 | --error_log implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log \ 56 | --source_file implementations/peakPicker/peakPicker.cpp implementations/peakPicker/peakPicker.hpp implementations/peakPicker/peakPicker_tb.cpp 57 | ``` 58 | 59 | ### Environment Setup 60 | ```bash 61 | # Required: Set Vitis HLS path 62 | export VITIS_HLS_PATH=/opt/Xilinx/Vitis_HLS/2023.2 63 | source $VITIS_HLS_PATH/settings64.sh 64 | 65 | # Required: Set at least one API key 66 | export GEMINI_API_KEY=your_key_here 67 | # Optional alternatives: 68 | export OPENAI_API_KEY=your_key_here 69 | export CLAUDE_API_KEY=your_key_here 70 | 71 | # Install Python dependencies 72 | pip install -r requirements.txt 73 | ``` 74 | 75 | ## Hardware Configuration 76 | 77 | - **Target FPGA**: xc7k410t-ffg900-2 (Kintex-7) 78 | - **Clock Frequency**: 256MHz (3.9ns period) 79 | - **Clock Uncertainty**: 12.5% 80 | - **HLS Version**: Vitis HLS 2023.2 81 | 82 | ## LLM Model Selection 83 | 84 | ### Supported Models and Use Cases 85 | - **gemini-2.0-flash-thinking-exp**: Fast iterations, general debugging 86 | - **gemini-2.0-pro-exp**: Complex algorithm conversion (default) 87 | - **gpt-4**: Detailed implementations requiring careful analysis 88 | - **gpt-3.5-turbo**: Quick prototyping and simple conversions 89 | - **claude-sonnet**: Algorithm explanations and documentation 90 | 91 | ### API Fallback Order 92 | 1. Gemini (primary) - good code reasoning and HLS optimization 93 | 2. OpenAI - comprehensive code generation 94 | 3. Claude - detailed algorithmic understanding 95 | 96 | ## File Organization Patterns 97 | 98 | ### Implementation Structure 99 | ``` 100 | implementations/ 101 | ├── peakPicker/ # Component-specific directory 102 | │ ├── Makefile # HLS build automation 103 | │ ├── peakPicker.cpp/.hpp # Generated HLS implementation 104 | │ ├── peakPicker_tb.cpp # Generated testbench 105 | │ └── documentation/ # Auto-generated docs 106 | ``` 107 | 108 | ### Data and Testing 109 | - `data/`: Test vectors and reference data (automatically included in HLS builds) 110 | - `algorithms/`: MATLAB reference implementations 111 | - Test data files are automatically detected and added to HLS projects 112 | 113 | ## Key Implementation Notes 114 | 115 | ### HLS-Specific Considerations 116 | - All implementations target ap_int<> and ap_fixed<> data types for optimal resource usage 117 | - Testbenches automatically load data from `../../data/` directory 118 | - HLS pragmas are used for performance optimization (PIPELINE, UNROLL, ARRAY_PARTITION) 119 | - Interface synthesis uses ap_ctrl_hs with AXI4-Stream for data 120 | 121 | ### Code Generation Process 122 | 1. Parse MATLAB reference algorithms 123 | 2. Apply domain-specific prompts (5G signal processing context) 124 | 3. Generate three files: header (.hpp), implementation (.cpp), testbench (_tb.cpp) 125 | 4. Automatic verification through C simulation 126 | 5. AI-powered debugging if errors occur 127 | 128 | ### Error Handling and Debugging 129 | - Automatic error log parsing and analysis 130 | - LLM generates detailed debug reports with specific fixes 131 | - Support for interface mismatches, data type issues, and algorithmic errors 132 | - Debug reports saved to `debug_reports/` with timestamps 133 | 134 | ## Working with Prompts 135 | 136 | ### Prompt Template Structure 137 | - **Context Section**: Algorithm purpose and background 138 | - **Task Description**: Clear implementation requirements 139 | - **Technical Requirements**: HLS-specific coding standards 140 | - **Deliverables**: Expected output files and format 141 | 142 | ### Prompt Backup System 143 | - All prompts are automatically backed up with timestamps in `prompts/backups/` 144 | - Version control for prompt evolution and A/B testing 145 | - Performance metrics tracking for prompt effectiveness -------------------------------------------------------------------------------- /example/peakPicker_tb.cpp: -------------------------------------------------------------------------------- 1 | #include "peakPicker.hpp" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | // Function to read data from file 11 | vector readDataFromFile(const string& filename) { 12 | vector data; 13 | ifstream file(filename); 14 | 15 | if (!file.is_open()) { 16 | cerr << "Error: Could not open file " << filename << endl; 17 | return data; 18 | } 19 | 20 | double value; 21 | while (file >> value) { 22 | data.push_back(value); 23 | } 24 | 25 | file.close(); 26 | cout << "Read " << data.size() << " values from " << filename << endl; 27 | return data; 28 | } 29 | 30 | // Function to read reference locations 31 | vector readReferenceLocations(const string& filename) { 32 | vector locations; 33 | ifstream file(filename); 34 | 35 | if (!file.is_open()) { 36 | cerr << "Error: Could not open file " << filename << endl; 37 | return locations; 38 | } 39 | 40 | int value; 41 | while (file >> value) { 42 | locations.push_back(value); 43 | } 44 | 45 | file.close(); 46 | cout << "Read " << locations.size() << " reference locations from " << filename << endl; 47 | return locations; 48 | } 49 | 50 | // Function to write results to file 51 | void writeResultsToFile(const string& filename, const vector& locations) { 52 | ofstream file(filename); 53 | 54 | if (!file.is_open()) { 55 | cerr << "Error: Could not create file " << filename << endl; 56 | return; 57 | } 58 | 59 | for (size_t i = 0; i < locations.size(); i++) { 60 | file << locations[i]; 61 | if (i < locations.size() - 1) { 62 | file << "\t"; 63 | } 64 | } 65 | file << endl; 66 | 67 | file.close(); 68 | cout << "Written " << locations.size() << " locations to " << filename << endl; 69 | } 70 | 71 | int main() { 72 | cout << "=== Peak Picker HLS Testbench ===" << endl; 73 | 74 | // Read input data 75 | vector xcorr_data = readDataFromFile("pssCorrMagSq_3_in.txt"); 76 | vector threshold_data = readDataFromFile("threshold_in.txt"); 77 | vector ref_locations = readReferenceLocations("locations_3_ref.txt"); 78 | 79 | if (xcorr_data.empty() || threshold_data.empty()) { 80 | cerr << "Error: Failed to read input data files" << endl; 81 | return -1; 82 | } 83 | 84 | if (xcorr_data.size() != threshold_data.size()) { 85 | cerr << "Error: Input data size mismatch" << endl; 86 | return -1; 87 | } 88 | 89 | cout << "Input data size: " << xcorr_data.size() << " samples" << endl; 90 | 91 | // Prepare data for HLS function 92 | static data_t xcorr[MAX_INPUT_SIZE]; 93 | static data_t threshold[MAX_INPUT_SIZE]; 94 | static index_t locations[MAX_PEAKS]; 95 | index_t num_peaks = 0; 96 | 97 | // Convert input data to fixed-point 98 | index_t input_length = min((size_t)MAX_INPUT_SIZE, xcorr_data.size()); 99 | 100 | for (index_t i = 0; i < input_length; i++) { 101 | xcorr[i] = (data_t)xcorr_data[i]; 102 | threshold[i] = (data_t)threshold_data[i]; 103 | } 104 | 105 | // Initialize output array 106 | for (int i = 0; i < MAX_PEAKS; i++) { 107 | locations[i] = 0; 108 | } 109 | 110 | cout << "Calling peakPicker function..." << endl; 111 | 112 | // Call the HLS function 113 | peakPicker(xcorr, threshold, input_length, locations, &num_peaks); 114 | 115 | cout << "Peak detection completed. Found " << num_peaks << " peaks." << endl; 116 | 117 | // Convert results to vector for easier handling 118 | vector detected_locations; 119 | for (index_t i = 0; i < num_peaks; i++) { 120 | detected_locations.push_back((int)locations[i]); 121 | } 122 | 123 | // Write results to file 124 | writeResultsToFile("peakLocs_out.txt", detected_locations); 125 | 126 | // Compare with reference 127 | cout << "\n=== Results Comparison ===" << endl; 128 | cout << "Detected peaks: " << detected_locations.size() << endl; 129 | cout << "Reference peaks: " << ref_locations.size() << endl; 130 | 131 | if (detected_locations.size() != ref_locations.size()) { 132 | cout << "WARNING: Different number of peaks detected!" << endl; 133 | } 134 | 135 | // Print detected locations 136 | cout << "\nDetected peak locations: "; 137 | for (size_t i = 0; i < detected_locations.size(); i++) { 138 | cout << detected_locations[i]; 139 | if (i < detected_locations.size() - 1) cout << ", "; 140 | } 141 | cout << endl; 142 | 143 | // Print reference locations 144 | cout << "Reference peak locations: "; 145 | for (size_t i = 0; i < ref_locations.size(); i++) { 146 | cout << ref_locations[i]; 147 | if (i < ref_locations.size() - 1) cout << ", "; 148 | } 149 | cout << endl; 150 | 151 | // Check if results match 152 | bool results_match = true; 153 | if (detected_locations.size() == ref_locations.size()) { 154 | for (size_t i = 0; i < detected_locations.size(); i++) { 155 | if (detected_locations[i] != ref_locations[i]) { 156 | results_match = false; 157 | break; 158 | } 159 | } 160 | } else { 161 | results_match = false; 162 | } 163 | 164 | cout << "\n=== Test Result ===" << endl; 165 | if (results_match) { 166 | cout << "✓ TEST PASSED: Output matches reference" << endl; 167 | return 0; 168 | } else { 169 | cout << "✗ TEST FAILED: Output does not match reference" << endl; 170 | 171 | // Calculate error metrics if sizes match 172 | if (detected_locations.size() == ref_locations.size() && !detected_locations.empty()) { 173 | double total_abs_error = 0; 174 | double max_abs_error = 0; 175 | 176 | for (size_t i = 0; i < detected_locations.size(); i++) { 177 | double abs_error = abs(detected_locations[i] - ref_locations[i]); 178 | total_abs_error += abs_error; 179 | max_abs_error = max(max_abs_error, abs_error); 180 | } 181 | 182 | double mean_abs_error = total_abs_error / detected_locations.size(); 183 | 184 | cout << "Error Analysis:" << endl; 185 | cout << " Mean absolute error: " << mean_abs_error << endl; 186 | cout << " Maximum absolute error: " << max_abs_error << endl; 187 | } 188 | 189 | return 1; 190 | } 191 | } -------------------------------------------------------------------------------- /example/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for HLS Project 2 | 3 | # Set the design name 4 | DESIGN_NAME = peakPicker 5 | 6 | # Configuration variables 7 | CSIM = 1 8 | CSYNTH = 1 9 | COSIM = 1 10 | EXPORT_IP = 1 11 | VIVADO_IMPL = 1 12 | 13 | # Hardware configuration 14 | CLOCK_FREQ = 256 15 | FPGA_PART = xc7k410t-ffg900-2 16 | CLOCK_UNCERTAINTY = 12.5 17 | 18 | # Vitis HLS installation path - modify this to match your installation 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2024.2 20 | # HLS compiler and flags - use full path to vitis_hls executable 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls 22 | 23 | # You can also set VITIS_HLS_PATH via environment variable before running make: 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation 25 | 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls 28 | 29 | HLS_PROJECT = proj_$(DESIGN_NAME) 30 | HLS_SOLUTION = solution1 31 | 32 | # C++ compiler and flags for testbench 33 | CXX = g++ 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17 35 | INCLUDES = -I$(XILINX_HLS)/include 36 | 37 | # Source files 38 | SRC_FILES = $(DESIGN_NAME).cpp 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp 40 | TEST_DATA_DIR = . 41 | # Get a list of all text files in the data directory 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt) 43 | 44 | # Target names 45 | TB_EXE = $(DESIGN_NAME)_test 46 | CSIM_TCL = csim.tcl 47 | CSYNTH_TCL = csynth.tcl 48 | COSIM_TCL = cosim.tcl 49 | EXPORT_TCL = export.tcl 50 | IMPL_TCL = impl.tcl 51 | 52 | # Calculate clock period in ns from MHz 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc) 54 | 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help 56 | 57 | all: clean tb csim csynth 58 | 59 | # Standalone testbench using GCC 60 | tb: 61 | @echo "Building standalone testbench..." 62 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE) 63 | @echo "Running standalone testbench..." 64 | ./$(TB_EXE) 65 | 66 | # HLS C Simulation 67 | csim: 68 | ifeq ($(CSIM), 1) 69 | @echo "Running HLS C Simulation..." 70 | @echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL) 71 | @echo "set_top peakPicker" >> $(CSIM_TCL) 72 | @echo "add_files $(SRC_FILES)" >> $(CSIM_TCL) 73 | @echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL) 74 | @for file in $(TEST_DATA_FILES); do \ 75 | echo "add_files -tb $$file" >> $(CSIM_TCL); \ 76 | done 77 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL) 78 | @echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL) 79 | @echo "csim_design" >> $(CSIM_TCL) 80 | @echo "exit" >> $(CSIM_TCL) 81 | $(HLS) -f $(CSIM_TCL) 82 | endif 83 | 84 | # HLS C Synthesis 85 | csynth: 86 | ifeq ($(CSYNTH), 1) 87 | @echo "Running HLS C Synthesis..." 88 | @echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL) 89 | @echo "set_top peakPicker" >> $(CSYNTH_TCL) 90 | @echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL) 91 | @echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL) 92 | @for file in $(TEST_DATA_FILES); do \ 93 | echo "add_files -tb $$file" >> $(CSYNTH_TCL); \ 94 | done 95 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL) 96 | @echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL) 97 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL) 98 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL) 99 | @echo "csynth_design" >> $(CSYNTH_TCL) 100 | @echo "exit" >> $(CSYNTH_TCL) 101 | $(HLS) -f $(CSYNTH_TCL) 102 | endif 103 | 104 | # HLS C/RTL Co-simulation 105 | cosim: 106 | ifeq ($(COSIM), 1) 107 | @echo "Running HLS C/RTL Co-simulation..." 108 | @echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL) 109 | @echo "set_top peakPicker" >> $(COSIM_TCL) 110 | @echo "add_files $(SRC_FILES)" >> $(COSIM_TCL) 111 | @echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL) 112 | @for file in $(TEST_DATA_FILES); do \ 113 | echo "add_files -tb $$file" >> $(COSIM_TCL); \ 114 | done 115 | @echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL) 116 | @echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL) 117 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL) 118 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL) 119 | @echo "cosim_design" >> $(COSIM_TCL) 120 | @echo "exit" >> $(COSIM_TCL) 121 | $(HLS) -f $(COSIM_TCL) 122 | endif 123 | 124 | # Export RTL as IP 125 | export_ip: 126 | ifeq ($(EXPORT_IP), 1) 127 | @echo "Exporting IP..." 128 | @echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL) 129 | @echo "set_top peakPicker" >> $(EXPORT_TCL) 130 | @echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL) 131 | @echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL) 132 | @for file in $(TEST_DATA_FILES); do \ 133 | echo "add_files -tb $$file" >> $(EXPORT_TCL); \ 134 | done 135 | @echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL) 136 | @echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL) 137 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL) 138 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL) 139 | @echo "export_design -format ip_catalog" >> $(EXPORT_TCL) 140 | @echo "exit" >> $(EXPORT_TCL) 141 | $(HLS) -f $(EXPORT_TCL) 142 | endif 143 | 144 | # Run Implementation in Vivado 145 | impl: 146 | ifeq ($(VIVADO_IMPL), 1) 147 | @echo "Running Vivado Implementation..." 148 | @echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL) 149 | @echo "set_top peakPicker" >> $(IMPL_TCL) 150 | @echo "add_files $(SRC_FILES)" >> $(IMPL_TCL) 151 | @echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL) 152 | @for file in $(TEST_DATA_FILES); do \ 153 | echo "add_files -tb $$file" >> $(IMPL_TCL); \ 154 | done 155 | @echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL) 156 | @echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL) 157 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL) 158 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL) 159 | @echo "export_design -flow impl" >> $(IMPL_TCL) 160 | @echo "exit" >> $(IMPL_TCL) 161 | $(HLS) -f $(IMPL_TCL) 162 | endif 163 | 164 | # Clean up 165 | clean: 166 | @echo "Cleaning up..." 167 | rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out 168 | 169 | # Help information 170 | help: 171 | @echo "Makefile for HLS Peak Picker Project" 172 | @echo "" 173 | @echo "Targets:" 174 | @echo " all : Run clean, testbench, C simulation, and C synthesis" 175 | @echo " tb : Build and run standalone testbench" 176 | @echo " csim : Run HLS C simulation" 177 | @echo " csynth : Run HLS C synthesis" 178 | @echo " cosim : Run HLS C/RTL co-simulation" 179 | @echo " export_ip : Export RTL as IP catalog" 180 | @echo " impl : Run implementation in Vivado" 181 | @echo " clean : Clean up generated files" 182 | @echo " help : Display this help information" 183 | @echo "" 184 | @echo "Configuration (set to 1 to enable, 0 to disable):" 185 | @echo " CSIM = $(CSIM) (C simulation)" 186 | @echo " CSYNTH = $(CSYNTH) (C synthesis)" 187 | @echo " COSIM = $(COSIM) (C/RTL co-simulation)" 188 | @echo " EXPORT_IP = $(EXPORT_IP) (Export IP)" 189 | @echo " VIVADO_IMPL= $(VIVADO_IMPL) (Vivado implementation)" 190 | @echo "" 191 | @echo "Hardware Configuration:" 192 | @echo " CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)" 193 | @echo " CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)" 194 | @echo " FPGA_PART = $(FPGA_PART) (FPGA part)" 195 | @echo " CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)% (Clock uncertainty)" 196 | -------------------------------------------------------------------------------- /scripts/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for HLS Project 2 | 3 | # Set the design name 4 | DESIGN_NAME = peakPicker 5 | 6 | # Configuration variables 7 | CSIM = 1 8 | CSYNTH = 1 9 | COSIM = 1 10 | EXPORT_IP = 1 11 | VIVADO_IMPL = 1 12 | 13 | # Hardware configuration 14 | CLOCK_FREQ = 256 15 | FPGA_PART = xc7k410t-ffg900-2 16 | CLOCK_UNCERTAINTY = 12.5 17 | 18 | # Vitis HLS installation path - modify this to match your installation 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2023.2 20 | # HLS compiler and flags - use full path to vitis_hls executable 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls 22 | 23 | # You can also set VITIS_HLS_PATH via environment variable before running make: 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation 25 | 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls 28 | 29 | HLS_PROJECT = proj_$(DESIGN_NAME) 30 | HLS_SOLUTION = solution1 31 | 32 | # C++ compiler and flags for testbench 33 | CXX = g++ 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17 35 | INCLUDES = -I$(XILINX_HLS)/include 36 | 37 | # Source files 38 | SRC_FILES = $(DESIGN_NAME).cpp 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp 40 | TEST_DATA_DIR = ../../data 41 | # Get a list of all text files in the data directory 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt) 43 | 44 | # Target names 45 | TB_EXE = $(DESIGN_NAME)_test 46 | CSIM_TCL = csim.tcl 47 | CSYNTH_TCL = csynth.tcl 48 | COSIM_TCL = cosim.tcl 49 | EXPORT_TCL = export.tcl 50 | IMPL_TCL = impl.tcl 51 | 52 | # Calculate clock period in ns from MHz 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc) 54 | 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help 56 | 57 | all: clean tb csim csynth 58 | 59 | # Standalone testbench using GCC 60 | tb: 61 | @echo "Building standalone testbench..." 62 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE) 63 | @echo "Running standalone testbench..." 64 | ./$(TB_EXE) 65 | 66 | # HLS C Simulation 67 | csim: 68 | ifeq ($(CSIM), 1) 69 | @echo "Running HLS C Simulation..." 70 | @echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL) 71 | @echo "set_top peakPicker" >> $(CSIM_TCL) 72 | @echo "add_files $(SRC_FILES)" >> $(CSIM_TCL) 73 | @echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL) 74 | @for file in $(TEST_DATA_FILES); do \ 75 | echo "add_files -tb $$file" >> $(CSIM_TCL); \ 76 | done 77 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL) 78 | @echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL) 79 | @echo "csim_design" >> $(CSIM_TCL) 80 | @echo "exit" >> $(CSIM_TCL) 81 | $(HLS) -f $(CSIM_TCL) 82 | endif 83 | 84 | # HLS C Synthesis 85 | csynth: 86 | ifeq ($(CSYNTH), 1) 87 | @echo "Running HLS C Synthesis..." 88 | @echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL) 89 | @echo "set_top peakPicker" >> $(CSYNTH_TCL) 90 | @echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL) 91 | @echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL) 92 | @for file in $(TEST_DATA_FILES); do \ 93 | echo "add_files -tb $$file" >> $(CSYNTH_TCL); \ 94 | done 95 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL) 96 | @echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL) 97 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL) 98 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL) 99 | @echo "csynth_design" >> $(CSYNTH_TCL) 100 | @echo "exit" >> $(CSYNTH_TCL) 101 | $(HLS) -f $(CSYNTH_TCL) 102 | endif 103 | 104 | # HLS C/RTL Co-simulation 105 | cosim: 106 | ifeq ($(COSIM), 1) 107 | @echo "Running HLS C/RTL Co-simulation..." 108 | @echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL) 109 | @echo "set_top peakPicker" >> $(COSIM_TCL) 110 | @echo "add_files $(SRC_FILES)" >> $(COSIM_TCL) 111 | @echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL) 112 | @for file in $(TEST_DATA_FILES); do \ 113 | echo "add_files -tb $$file" >> $(COSIM_TCL); \ 114 | done 115 | @echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL) 116 | @echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL) 117 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL) 118 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL) 119 | @echo "cosim_design" >> $(COSIM_TCL) 120 | @echo "exit" >> $(COSIM_TCL) 121 | $(HLS) -f $(COSIM_TCL) 122 | endif 123 | 124 | # Export RTL as IP 125 | export_ip: 126 | ifeq ($(EXPORT_IP), 1) 127 | @echo "Exporting IP..." 128 | @echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL) 129 | @echo "set_top peakPicker" >> $(EXPORT_TCL) 130 | @echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL) 131 | @echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL) 132 | @for file in $(TEST_DATA_FILES); do \ 133 | echo "add_files -tb $$file" >> $(EXPORT_TCL); \ 134 | done 135 | @echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL) 136 | @echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL) 137 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL) 138 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL) 139 | @echo "export_design -format ip_catalog" >> $(EXPORT_TCL) 140 | @echo "exit" >> $(EXPORT_TCL) 141 | $(HLS) -f $(EXPORT_TCL) 142 | endif 143 | 144 | # Run Implementation in Vivado 145 | impl: 146 | ifeq ($(VIVADO_IMPL), 1) 147 | @echo "Running Vivado Implementation..." 148 | @echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL) 149 | @echo "set_top peakPicker" >> $(IMPL_TCL) 150 | @echo "add_files $(SRC_FILES)" >> $(IMPL_TCL) 151 | @echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL) 152 | @for file in $(TEST_DATA_FILES); do \ 153 | echo "add_files -tb $$file" >> $(IMPL_TCL); \ 154 | done 155 | @echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL) 156 | @echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL) 157 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL) 158 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL) 159 | @echo "export_design -flow impl" >> $(IMPL_TCL) 160 | @echo "exit" >> $(IMPL_TCL) 161 | $(HLS) -f $(IMPL_TCL) 162 | endif 163 | 164 | # Clean up 165 | clean: 166 | @echo "Cleaning up..." 167 | rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out 168 | 169 | # Help information 170 | help: 171 | @echo "Makefile for HLS Peak Picker Project" 172 | @echo "" 173 | @echo "Targets:" 174 | @echo " all : Run clean, testbench, C simulation, and C synthesis" 175 | @echo " tb : Build and run standalone testbench" 176 | @echo " csim : Run HLS C simulation" 177 | @echo " csynth : Run HLS C synthesis" 178 | @echo " cosim : Run HLS C/RTL co-simulation" 179 | @echo " export_ip : Export RTL as IP catalog" 180 | @echo " impl : Run implementation in Vivado" 181 | @echo " clean : Clean up generated files" 182 | @echo " help : Display this help information" 183 | @echo "" 184 | @echo "Configuration (set to 1 to enable, 0 to disable):" 185 | @echo " CSIM = $(CSIM) (C simulation)" 186 | @echo " CSYNTH = $(CSYNTH) (C synthesis)" 187 | @echo " COSIM = $(COSIM) (C/RTL co-simulation)" 188 | @echo " EXPORT_IP = $(EXPORT_IP) (Export IP)" 189 | @echo " VIVADO_IMPL= $(VIVADO_IMPL) (Vivado implementation)" 190 | @echo "" 191 | @echo "Hardware Configuration:" 192 | @echo " CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)" 193 | @echo " CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)" 194 | @echo " FPGA_PART = $(FPGA_PART) (FPGA part)" 195 | @echo " CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)% (Clock uncertainty)" 196 | -------------------------------------------------------------------------------- /implementations/peakPicker/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for HLS Project 2 | 3 | # Set the design name 4 | DESIGN_NAME = peakPicker 5 | 6 | # Configuration variables 7 | CSIM = 1 8 | CSYNTH = 1 9 | COSIM = 1 10 | EXPORT_IP = 1 11 | VIVADO_IMPL = 1 12 | 13 | # Hardware configuration 14 | CLOCK_FREQ = 256 15 | FPGA_PART = xc7k410t-ffg900-2 16 | CLOCK_UNCERTAINTY = 12.5 17 | 18 | # Vitis HLS installation path - modify this to match your installation 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2023.2 20 | # HLS compiler and flags - use full path to vitis_hls executable 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls 22 | 23 | # You can also set VITIS_HLS_PATH via environment variable before running make: 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation 25 | 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls 28 | 29 | HLS_PROJECT = proj_$(DESIGN_NAME) 30 | HLS_SOLUTION = solution1 31 | 32 | # C++ compiler and flags for testbench 33 | CXX = g++ 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17 35 | INCLUDES = -I$(XILINX_HLS)/include 36 | 37 | # Source files 38 | SRC_FILES = $(DESIGN_NAME).cpp 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp 40 | TEST_DATA_DIR = ../../data 41 | # Get a list of all text files in the data directory 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt) 43 | 44 | # Target names 45 | TB_EXE = $(DESIGN_NAME)_test 46 | CSIM_TCL = csim.tcl 47 | CSYNTH_TCL = csynth.tcl 48 | COSIM_TCL = cosim.tcl 49 | EXPORT_TCL = export.tcl 50 | IMPL_TCL = impl.tcl 51 | 52 | # Calculate clock period in ns from MHz 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc) 54 | 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help 56 | 57 | all: clean tb csim csynth 58 | 59 | # Standalone testbench using GCC 60 | tb: 61 | @echo "Building standalone testbench..." 62 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE) 63 | @echo "Running standalone testbench..." 64 | ./$(TB_EXE) 65 | 66 | # HLS C Simulation 67 | csim: 68 | ifeq ($(CSIM), 1) 69 | @echo "Running HLS C Simulation..." 70 | @echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL) 71 | @echo "set_top peakPicker" >> $(CSIM_TCL) 72 | @echo "add_files $(SRC_FILES)" >> $(CSIM_TCL) 73 | @echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL) 74 | @for file in $(TEST_DATA_FILES); do \ 75 | echo "add_files -tb $$file" >> $(CSIM_TCL); \ 76 | done 77 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL) 78 | @echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL) 79 | @echo "csim_design" >> $(CSIM_TCL) 80 | @echo "exit" >> $(CSIM_TCL) 81 | $(HLS) -f $(CSIM_TCL) 82 | endif 83 | 84 | # HLS C Synthesis 85 | csynth: 86 | ifeq ($(CSYNTH), 1) 87 | @echo "Running HLS C Synthesis..." 88 | @echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL) 89 | @echo "set_top peakPicker" >> $(CSYNTH_TCL) 90 | @echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL) 91 | @echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL) 92 | @for file in $(TEST_DATA_FILES); do \ 93 | echo "add_files -tb $$file" >> $(CSYNTH_TCL); \ 94 | done 95 | @echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL) 96 | @echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL) 97 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL) 98 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL) 99 | @echo "csynth_design" >> $(CSYNTH_TCL) 100 | @echo "exit" >> $(CSYNTH_TCL) 101 | $(HLS) -f $(CSYNTH_TCL) 102 | endif 103 | 104 | # HLS C/RTL Co-simulation 105 | cosim: 106 | ifeq ($(COSIM), 1) 107 | @echo "Running HLS C/RTL Co-simulation..." 108 | @echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL) 109 | @echo "set_top peakPicker" >> $(COSIM_TCL) 110 | @echo "add_files $(SRC_FILES)" >> $(COSIM_TCL) 111 | @echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL) 112 | @for file in $(TEST_DATA_FILES); do \ 113 | echo "add_files -tb $$file" >> $(COSIM_TCL); \ 114 | done 115 | @echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL) 116 | @echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL) 117 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL) 118 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL) 119 | @echo "cosim_design" >> $(COSIM_TCL) 120 | @echo "exit" >> $(COSIM_TCL) 121 | $(HLS) -f $(COSIM_TCL) 122 | endif 123 | 124 | # Export RTL as IP 125 | export_ip: 126 | ifeq ($(EXPORT_IP), 1) 127 | @echo "Exporting IP..." 128 | @echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL) 129 | @echo "set_top peakPicker" >> $(EXPORT_TCL) 130 | @echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL) 131 | @echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL) 132 | @for file in $(TEST_DATA_FILES); do \ 133 | echo "add_files -tb $$file" >> $(EXPORT_TCL); \ 134 | done 135 | @echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL) 136 | @echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL) 137 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL) 138 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL) 139 | @echo "export_design -format ip_catalog" >> $(EXPORT_TCL) 140 | @echo "exit" >> $(EXPORT_TCL) 141 | $(HLS) -f $(EXPORT_TCL) 142 | endif 143 | 144 | # Run Implementation in Vivado 145 | impl: 146 | ifeq ($(VIVADO_IMPL), 1) 147 | @echo "Running Vivado Implementation..." 148 | @echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL) 149 | @echo "set_top peakPicker" >> $(IMPL_TCL) 150 | @echo "add_files $(SRC_FILES)" >> $(IMPL_TCL) 151 | @echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL) 152 | @for file in $(TEST_DATA_FILES); do \ 153 | echo "add_files -tb $$file" >> $(IMPL_TCL); \ 154 | done 155 | @echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL) 156 | @echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL) 157 | @echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL) 158 | @echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL) 159 | @echo "export_design -flow impl" >> $(IMPL_TCL) 160 | @echo "exit" >> $(IMPL_TCL) 161 | $(HLS) -f $(IMPL_TCL) 162 | endif 163 | 164 | # Clean up 165 | clean: 166 | @echo "Cleaning up..." 167 | rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out 168 | 169 | # Help information 170 | help: 171 | @echo "Makefile for HLS Peak Picker Project" 172 | @echo "" 173 | @echo "Targets:" 174 | @echo " all : Run clean, testbench, C simulation, and C synthesis" 175 | @echo " tb : Build and run standalone testbench" 176 | @echo " csim : Run HLS C simulation" 177 | @echo " csynth : Run HLS C synthesis" 178 | @echo " cosim : Run HLS C/RTL co-simulation" 179 | @echo " export_ip : Export RTL as IP catalog" 180 | @echo " impl : Run implementation in Vivado" 181 | @echo " clean : Clean up generated files" 182 | @echo " help : Display this help information" 183 | @echo "" 184 | @echo "Configuration (set to 1 to enable, 0 to disable):" 185 | @echo " CSIM = $(CSIM) (C simulation)" 186 | @echo " CSYNTH = $(CSYNTH) (C synthesis)" 187 | @echo " COSIM = $(COSIM) (C/RTL co-simulation)" 188 | @echo " EXPORT_IP = $(EXPORT_IP) (Export IP)" 189 | @echo " VIVADO_IMPL= $(VIVADO_IMPL) (Vivado implementation)" 190 | @echo "" 191 | @echo "Hardware Configuration:" 192 | @echo " CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)" 193 | @echo " CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)" 194 | @echo " FPGA_PART = $(FPGA_PART) (FPGA part)" 195 | @echo " CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)% (Clock uncertainty)" 196 | -------------------------------------------------------------------------------- /prompts/readme_generation.md: -------------------------------------------------------------------------------- 1 | # README Generation Prompt 2 | 3 | ## Context 4 | You are tasked with creating a comprehensive README document for an FPGA hardware accelerator design. This document will be the primary reference for users, developers, and stakeholders who interact with this hardware component. 5 | 6 | ## Component Overview 7 | - **Component Name**: {component_name} 8 | - **Design Purpose**: A hardware accelerator implemented on an FPGA 9 | - **Generation Method**: AI-assisted design using LLM ({generation_model}) 10 | - **Target Platform**: Xilinx FPGA ({fpga_part}) 11 | 12 | ## Instructions 13 | 14 | Create a comprehensive README.md file following this structure: 15 | 16 | ### 1. Title and Introduction 17 | - Clear title with component name 18 | - Brief overview of what the component does 19 | - Key features and capabilities 20 | - Target applications 21 | 22 | ### 2. Hardware Architecture 23 | - High-level block diagram description 24 | - Key architectural components 25 | - Data flow explanation 26 | - Interface specifications 27 | - Include design decisions and their rationales 28 | 29 | **Architecture Visualization**: Include a Mermaid flowchart diagram showing the main components and data flow. Example: 30 | 31 | ```mermaid 32 | flowchart TD 33 | A["Input Interface"] --> B["Core Processing Logic"] 34 | B --> C["Output Interface"] 35 | B --> D["Control Unit"] 36 | E["Memory"] <--> B 37 | ``` 38 | 39 | ### 3. Implementation Details 40 | - HLS directives and optimizations used 41 | - Resource utilization (LUTs, FFs, DSPs, BRAMs) 42 | - Critical design parameters 43 | - Key algorithms and their hardware mapping 44 | 45 | **Algorithm Visualization**: Include a Mermaid flowchart or sequence diagram showing the algorithm implementation. Example of algorithm flowchart: 46 | 47 | ```mermaid 48 | flowchart LR 49 | A["Input Data"] --> B["Stage 1: Preprocessing"] 50 | B --> C["Stage 2: Computation"] 51 | C --> D["Stage 3: Postprocessing"] 52 | D --> E["Output Result"] 53 | 54 | subgraph "Core Algorithm" 55 | B 56 | C 57 | D 58 | end 59 | ``` 60 | 61 | ### 4. Performance Metrics 62 | - Latency (in cycles) 63 | - Throughput 64 | - Clock frequency 65 | - Resource efficiency 66 | - Comparative analysis against baseline if available 67 | 68 | **Performance Visualization**: Present performance metrics in clear tables and include a state diagram if applicable. Example: 69 | 70 | | Metric | Value | Unit | 71 | |----------------|----------|------------| 72 | | Latency | X | cycles | 73 | | Clock Period | Y | ns | 74 | | Throughput | Z | items/cycle| 75 | | Resource Usage | See table below | | 76 | 77 | ## Resource Utilization 78 | 79 | | Resource | Utilization | Available | Utilization % | 80 | |----------|-------------|-----------|---------------| 81 | | LUT | X | X_total | X_percent | 82 | | FF | Y | Y_total | Y_percent | 83 | | DSP | Z | Z_total | Z_percent | 84 | | BRAM | W | W_total | W_percent | 85 | 86 | ## Timing 87 | 88 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) | 89 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------| 90 | | solution1 | 3.90 | 256.00 | u.uu | vvv.vv | x.xx | yyy.yy | 91 | 92 | ## Latency 93 | 94 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) | 95 | |---------------|-------------|-------------|-----------------|-----------------------------| 96 | | solution1 | X | Y | - | - | 97 | 98 | 99 | For state machines, use: 100 | 101 | ```mermaid 102 | stateDiagram-v2 103 | [*] --> Idle 104 | Idle --> Processing: "start_signal" 105 | Processing --> Done: "processing_complete" 106 | Done --> Idle: "reset" 107 | ``` 108 | 109 | ### 5. Setup and Usage 110 | - Prerequisites (tools, versions) 111 | - Build instructions 112 | - Integration guidance 113 | - Testbench explanation 114 | - Common usage patterns 115 | - API documentation if applicable 116 | 117 | **Setup Visualization**: If applicable, include a sequence diagram showing the setup and usage flow: 118 | 119 | ```mermaid 120 | sequenceDiagram 121 | participant User 122 | participant Build System 123 | participant FPGA 124 | 125 | User->>Build System: "Run build script" 126 | Build System->>FPGA: "Generate bitstream" 127 | FPGA-->>Build System: "Bitstream ready" 128 | Build System-->>User: "Build complete" 129 | User->>FPGA: "Load design" 130 | User->>FPGA: "Send data" 131 | FPGA-->>User: "Return results" 132 | ``` 133 | 134 | ### 6. Results and Validation 135 | - Verification methodology 136 | - Simulation results 137 | - Hardware testing results if available 138 | - Performance validation 139 | 140 | **Results Visualization**: Present validation results in tables and comparison charts where applicable. 141 | 142 | ### 7. Development History 143 | - Design evolution 144 | - Challenges encountered and their solutions 145 | - Optimization iterations 146 | - AI assistance insights 147 | 148 | ### 8. Future Work 149 | - Potential improvements 150 | - Scaling opportunities 151 | - Additional features 152 | 153 | ## Source Information 154 | Use the following source files and metrics to inform your documentation: 155 | 156 | ### Source Code 157 | ```cpp 158 | // Header file ({component_name}.hpp) 159 | {header_code} 160 | ``` 161 | 162 | ```cpp 163 | // Implementation file ({component_name}.cpp) 164 | {implementation_code} 165 | ``` 166 | 167 | ```cpp 168 | // Testbench file ({component_name}_tb.cpp) 169 | {testbench_code} 170 | ``` 171 | 172 | ### Performance Metrics 173 | {performance_metrics} 174 | 175 | ### Implementation Challenges 176 | {errors_encountered} 177 | 178 | ### Debugging Methods 179 | {debugging_methods} 180 | 181 | ## Diagram Examples 182 | The following are examples of different types of Mermaid diagrams you can use: 183 | 184 | {diagram_examples} 185 | 186 | ## Chart Examples 187 | The following are examples of different types of tables/charts for performance data: 188 | 189 | {chart_examples} 190 | 191 | ## Style Guidelines 192 | - Use clear, technical language appropriate for engineering documentation 193 | - Include code snippets where helpful 194 | - Use markdown formatting features (headers, lists, tables, code blocks) 195 | - Be concise but comprehensive 196 | - Focus on practical usage and technical details 197 | - Highlight AI-assisted aspects of the development process 198 | - Maintain a professional tone 199 | - Make effective use of diagrams and visualizations for clarity 200 | - Use Mermaid diagrams for architecture, data flow, and algorithms 201 | - Use tables to present performance metrics and comparative analysis 202 | 203 | Your README should serve as both a technical reference and a guide for someone who wants to understand, use, or modify the hardware component. 204 | 205 | ## BEST PRACTICES 206 | 207 | - Successfully generated documentation on 2025-04-06 208 | - Successfully generated documentation on 2025-04-06 209 | - Successfully generated documentation on 2025-04-06 210 | - Successfully generated documentation on 2025-04-06 211 | - Successfully generated documentation on 2025-04-06 212 | - Successfully generated documentation on 2025-04-06 213 | - Successfully generated documentation on 2025-04-06 214 | - Successfully generated documentation on 2025-04-06 215 | - Successfully generated documentation on 2025-04-06 216 | -------------------------------------------------------------------------------- /prompts/paper_generation.md: -------------------------------------------------------------------------------- 1 | # Academic Paper Generation Prompt 2 | 3 | ## Context 4 | You are tasked with writing an academic research paper about an FPGA hardware accelerator design that was developed using an AI-assisted design methodology. This paper should follow academic standards and contribute to the literature on hardware acceleration and AI-assisted design. 5 | 6 | ## Component Information 7 | - **Component Name**: {component_name} 8 | - **LLM Used for Generation**: {generation_model} 9 | - **Target FPGA Platform**: {fpga_part} 10 | - **Domain**: Hardware Acceleration for Digital Signal Processing/Machine Learning/etc. 11 | 12 | ## Paper Structure Requirements 13 | 14 | Create a complete academic paper in markdown format with the following structure: 15 | 16 | ### 1. Title and Authors 17 | - Create an appropriate academic title for this work 18 | - List authors as the research team (placeholder) 19 | - Include institutional affiliation 20 | 21 | ### 2. Abstract (200-250 words) 22 | - Summarize the paper's content 23 | - State the problem addressed 24 | - Describe the approach using AI-assisted design 25 | - Highlight key results and contributions 26 | - Mention broader impact 27 | 28 | ### 3. Introduction 29 | - Context and background of the problem 30 | - Motivation for hardware acceleration 31 | - Challenges in traditional FPGA design 32 | - Introduction to AI-assisted hardware design 33 | - Contribution statement 34 | - Paper organization 35 | 36 | ### 4. Related Work (2-3 subsections) 37 | - Prior work on hardware acceleration for similar applications 38 | - Previous research on automated HLS design 39 | - AI-assisted hardware design methodologies 40 | - Positioning of current work within literature 41 | 42 | ### 5. Methodology 43 | - Overall design approach 44 | - AI-assisted design workflow description 45 | - Prompt engineering for hardware generation 46 | - Iteration and refinement process 47 | - Verification methodology 48 | 49 | **Workflow Visualization**: Include a Mermaid diagram showing the AI-assisted design workflow. Example: 50 | 51 | ```mermaid 52 | flowchart TD 53 | A["Problem Definition"] --> B["Prompt Engineering"] 54 | B --> C["LLM Code Generation"] 55 | C --> D["Code Verification"] 56 | D -->|"Errors"| E["Debugging"] 57 | E --> C 58 | D -->|"Success"| F["Implementation"] 59 | F --> G["Performance Analysis"] 60 | G -->|"Optimization Needed"| H["Optimization Prompts"] 61 | H --> C 62 | G -->|"Acceptable"| I["Final Design"] 63 | ``` 64 | 65 | ### 6. Design Architecture 66 | - System-level architecture 67 | - Component interfaces and data flow 68 | - Key algorithmic components 69 | - Design constraints and considerations 70 | - HLS implementation details 71 | - Optimizations applied 72 | 73 | **Architecture Visualization**: Include a detailed Mermaid diagram showing the system architecture and data flow. Example: 74 | 75 | ```mermaid 76 | flowchart LR 77 | A["External Input"] --> B["Input Interface"] 78 | B --> C["Processing Module"] 79 | 80 | subgraph "Core Accelerator" 81 | C --> D["Algorithm Stage 1"] 82 | D --> E["Algorithm Stage 2"] 83 | E --> F["Algorithm Stage 3"] 84 | end 85 | 86 | F --> G["Output Interface"] 87 | G --> H["External Output"] 88 | 89 | I["Control Logic"] --> C 90 | I --> D 91 | I --> E 92 | I --> F 93 | ``` 94 | 95 | ### 7. Implementation 96 | - HLS directives and pragmas 97 | - Resource allocation strategies 98 | - Pipeline and parallelism exploitations 99 | - Memory architecture and data movement 100 | - Critical path analysis 101 | 102 | **Implementation Visualization**: Include a Mermaid diagram showing key optimization strategies or pipeline structure. Example: 103 | 104 | ```mermaid 105 | gantt 106 | title Pipeline Structure 107 | dateFormat s 108 | axisFormat %S 109 | 110 | section Without Pipelining 111 | Stage 1 :a1, 0, 3s 112 | Stage 2 :a2, after a1, 2s 113 | Stage 3 :a3, after a2, 2s 114 | 115 | section With Pipelining 116 | Stage 1 (Iter 1) :b1, 0, 3s 117 | Stage 2 (Iter 1) :b2, after b1, 2s 118 | Stage 1 (Iter 2) :b3, after b1, 3s 119 | Stage 3 (Iter 1) :b4, after b2, 2s 120 | Stage 2 (Iter 2) :b5, after b3, 2s 121 | ``` 122 | 123 | ### 8. Experimental Results 124 | - Experimental setup 125 | - Performance metrics: 126 | - Resource utilization 127 | - Timing and frequency 128 | - Latency and throughput 129 | - Power consumption (if available) 130 | - Comparative analysis with: 131 | - Manual implementations 132 | - Other automated approaches 133 | - Software-only solutions 134 | - Discussion of results 135 | 136 | **Results Visualization**: Present results in clear tables and comparison charts: 137 | 138 | ``` 139 | | Implementation | LUTs | FFs | DSPs | BRAMs | Clock Freq (MHz) | Latency (cycles) | 140 | |----------------|------|-----|------|-------|------------------|------------------| 141 | | Our Work | X | Y | Z | W | F | L | 142 | | Baseline 1 | X1 | Y1 | Z1 | W1 | F1 | L1 | 143 | | Baseline 2 | X2 | Y2 | Z2 | W2 | F2 | L2 | 144 | ``` 145 | 146 | Consider including Mermaid charts for comparative analysis: 147 | 148 | ```mermaid 149 | pie title Resource Distribution 150 | "LUTs" : X 151 | "FFs" : Y 152 | "DSPs" : Z 153 | "BRAMs" : W 154 | ``` 155 | 156 | ### 9. Analysis of AI-Assisted Design Process 157 | - Analysis of LLM strengths/weaknesses in hardware design 158 | - Error patterns and resolution strategies 159 | - Human-AI collaboration insights 160 | - Design quality assessment 161 | - Development efficiency metrics 162 | 163 | **Process Visualization**: Show the iteration process and error resolution: 164 | 165 | ```mermaid 166 | stateDiagram-v2 167 | [*] --> Prompt 168 | Prompt --> Generation 169 | Generation --> Verification 170 | Verification --> Success 171 | Verification --> Errors 172 | Errors --> Analysis 173 | Analysis --> Refinement 174 | Refinement --> Prompt 175 | Success --> [*] 176 | ``` 177 | 178 | ### 10. Discussion 179 | - Interpretation of results 180 | - Limitations of the approach 181 | - Generalizability of the methodology 182 | - Trade-offs identified 183 | - Lessons learned 184 | 185 | ### 11. Future Work 186 | - Potential improvements 187 | - Broader applications 188 | - Research directions 189 | - Scaling to more complex designs 190 | 191 | ### 12. Conclusion 192 | - Summary of contributions 193 | - Key findings 194 | - Broader impact 195 | - Closing thoughts 196 | 197 | ### 13. References 198 | - Include relevant references to: 199 | - Hardware acceleration literature 200 | - High-Level Synthesis research 201 | - AI in design automation 202 | - Relevant applications 203 | - Methodological papers 204 | 205 | ## Source Information 206 | Use the following source information to inform your paper: 207 | 208 | ### Source Code Architecture 209 | ```cpp 210 | // Header file ({component_name}.hpp) 211 | {header_code} 212 | ``` 213 | 214 | ```cpp 215 | // Implementation file ({component_name}.cpp) 216 | {implementation_code} 217 | ``` 218 | 219 | ### Performance Metrics 220 | {performance_metrics} 221 | 222 | ### Design Process 223 | - AI Generation Model: {generation_model} 224 | - Workflow Steps: {workflow_steps} 225 | - Successful Steps: {successful_steps} 226 | - Error Steps: {error_steps} 227 | 228 | ### Implementation Challenges 229 | {errors_encountered} 230 | 231 | ### Debugging Methods 232 | {debugging_methods} 233 | 234 | ## Diagram Examples 235 | The following are examples of different types of Mermaid diagrams you can use: 236 | 237 | {diagram_examples} 238 | 239 | ## Chart Examples 240 | The following are examples of different types of tables/charts for performance data: 241 | 242 | {chart_examples} 243 | 244 | ## Academic Style Guidelines 245 | - Use formal academic language 246 | - Present objective analysis of results 247 | - Support claims with data from implementation 248 | - Discuss limitations honestly 249 | - Position work in relation to existing literature 250 | - Use the third person (avoid "I", "we", "you") 251 | - Maintain scientific rigor throughout 252 | - Use passive voice where appropriate 253 | - Include a balanced mix of technical details and higher-level discussion 254 | - Present results visually through diagrams, charts and tables 255 | - Use Mermaid diagrams for architecture, methodology, and algorithmic visualizations 256 | - Present performance results in tables with comparative analysis 257 | 258 | Your paper should contribute to the academic discourse on AI-assisted hardware design while presenting concrete technical achievements and insights. 259 | 260 | ## BEST PRACTICES 261 | - Successfully generated documentation on 2025-04-06 262 | -------------------------------------------------------------------------------- /implementations/peakPicker/peakPicker_tb.cpp: -------------------------------------------------------------------------------- 1 | /* AUTO-EDITED BY DEBUG ASSISTANT */ 2 | #include "peakPicker.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include // For std::abs 8 | #include // For numeric_limits 9 | #include // For std::setprecision 10 | 11 | // Define input/output file names 12 | const std::string XCORR_INPUT_FILE = "pssCorrMagSq_3_in.txt"; // Matches MATLAB TB 13 | const std::string THRESHOLD_INPUT_FILE = "threshold_in.txt"; // Matches MATLAB TB 14 | const std::string REF_OUTPUT_FILE = "locations_3_ref.txt"; // Matches MATLAB TB 15 | // const std::string REF_OUTPUT_FILE = "peakLocs_out.txt"; // Alternative reference file name 16 | 17 | // Function to read data from a file into a vector of doubles 18 | bool readDataFile(const std::string& filename, std::vector& data) { 19 | std::ifstream infile(filename); 20 | if (!infile.is_open()) { 21 | std::cerr << "Error: Could not open file: " << filename << std::endl; 22 | return false; 23 | } 24 | double value; 25 | while (infile >> value) { 26 | data.push_back(value); 27 | } 28 | // Check for read errors (e.g., non-numeric data) after the loop 29 | if (infile.bad()) { 30 | std::cerr << "Error: Failed reading data from file: " << filename << std::endl; 31 | infile.close(); 32 | return false; 33 | } 34 | // Check if EOF was reached OR if a formatting error stopped the loop early 35 | // infile.fail() is true if >> failed (e.g., bad format), but not for EOF 36 | // infile.eof() is true if >> tried to read past EOF 37 | if (!infile.eof() && infile.fail()) { 38 | std::cerr << "Warning: Input format error suspected in file: " << filename << std::endl; 39 | // Continue, but be aware data might be incomplete 40 | } 41 | infile.close(); 42 | if (data.empty() && !infile.eof()) { // Check if file was opened but no data read 43 | std::cerr << "Warning: No data read from file (or file empty): " << filename << std::endl; 44 | } 45 | std::cout << "Read " << data.size() << " values from " << filename << std::endl; 46 | return true; 47 | } 48 | 49 | // Function to read integer data (locations) from a file 50 | bool readIntDataFile(const std::string& filename, std::vector& data) { 51 | std::ifstream infile(filename); 52 | if (!infile.is_open()) { 53 | std::cerr << "Error: Could not open file: " << filename << std::endl; 54 | return false; 55 | } 56 | int value; 57 | while (infile >> value) { 58 | data.push_back(value); 59 | } 60 | // Check for read errors (e.g., non-numeric data) after the loop 61 | if (infile.bad()) { 62 | std::cerr << "Error: Failed reading data from file: " << filename << std::endl; 63 | infile.close(); 64 | return false; 65 | } 66 | // Check if EOF was reached OR if a formatting error stopped the loop early 67 | if (!infile.eof() && infile.fail()) { 68 | std::cerr << "Warning: Input format error suspected in file: " << filename << std::endl; 69 | // Continue, but be aware data might be incomplete 70 | } 71 | infile.close(); 72 | if (data.empty() && !infile.eof()) { // Check if file was opened but no data read 73 | std::cerr << "Warning: No data read from file (or file empty): " << filename << std::endl; 74 | } 75 | std::cout << "Read " << data.size() << " values from " << filename << std::endl; 76 | return true; 77 | } 78 | 79 | 80 | int main() { 81 | std::cout << "--- Starting Peak Picker Testbench ---" << std::endl; 82 | 83 | // --- Data Loading --- 84 | std::vector xcorrVec, thresholdVec; 85 | std::vector refLocsVec; // Use int for reference locations from file 86 | 87 | std::cout << "Loading input data..." << std::endl; 88 | if (!readDataFile(XCORR_INPUT_FILE, xcorrVec)) return 1; 89 | if (!readDataFile(THRESHOLD_INPUT_FILE, thresholdVec)) return 1; 90 | 91 | std::cout << "Loading reference output data..." << std::endl; 92 | // Assuming reference file contains 1-based indices from MATLAB 93 | if (!readIntDataFile(REF_OUTPUT_FILE, refLocsVec)) return 1; 94 | 95 | // Basic input validation 96 | if (xcorrVec.size() != thresholdVec.size()) { 97 | std::cerr << "Error: Input xcorr size (" << xcorrVec.size() 98 | << ") does not match threshold size (" << thresholdVec.size() << ")" << std::endl; 99 | return 1; 100 | } 101 | if (xcorrVec.empty()) { 102 | std::cerr << "Error: Input data vectors are empty (or failed to load)." << std::endl; 103 | return 1; 104 | } 105 | 106 | int numSamples = xcorrVec.size(); 107 | std::cout << "Number of samples to process: " << numSamples << std::endl; 108 | 109 | // --- Stream Preparation --- 110 | hls::stream xcorrStream("xcorrStream"); 111 | hls::stream thresholdStream("thresholdStream"); 112 | hls::stream locationStream("locationStream"); 113 | 114 | std::cout << "Populating input streams..." << std::endl; 115 | for (int i = 0; i < numSamples; ++i) { 116 | // Convert double to fixed-point Data_t 117 | // Add checks here if concerned about out-of-range conversions, though 118 | // ap_fixed usually handles this via saturation or wrapping based on config. 119 | xcorrStream.write(static_cast(xcorrVec[i])); 120 | thresholdStream.write(static_cast(thresholdVec[i])); 121 | } 122 | std::cout << "Input streams populated." << std::endl; 123 | 124 | // --- Call the DUT (Device Under Test) --- 125 | std::cout << "Calling HLS peakPicker function..." << std::endl; 126 | peakPicker(xcorrStream, thresholdStream, locationStream, numSamples); 127 | std::cout << "HLS peakPicker function finished." << std::endl; 128 | 129 | // --- Collect Results --- 130 | std::vector actualLocsVec; // Store results from DUT (0-based) 131 | std::cout << "Collecting results from output stream..." << std::endl; 132 | while (!locationStream.empty()) { 133 | actualLocsVec.push_back(locationStream.read()); 134 | } 135 | std::cout << "Collected " << actualLocsVec.size() << " peak locations." << std::endl; 136 | 137 | // --- Verification --- 138 | std::cout << "Comparing HLS results with reference..." << std::endl; 139 | bool match = true; 140 | int errorCount = 0; 141 | 142 | // Compare number of peaks found 143 | if (actualLocsVec.size() != refLocsVec.size()) { 144 | std::cerr << "Error: Mismatch in number of detected peaks!" << std::endl; 145 | std::cerr << " Expected: " << refLocsVec.size() << std::endl; 146 | std::cerr << " Actual: " << actualLocsVec.size() << std::endl; 147 | match = false; 148 | // Don't stop here, try comparing the elements we do have if sizes are different 149 | // errorCount will increase significantly anyway. 150 | } else { 151 | std::cout << "Number of peaks matches reference (" << refLocsVec.size() << ")." << std::endl; 152 | } 153 | 154 | // Compare actual peak locations element by element 155 | size_t comparisonLimit = std::min(actualLocsVec.size(), refLocsVec.size()); 156 | for (size_t i = 0; i < comparisonLimit; ++i) { 157 | // Convert DUT output (ap_uint) to int for comparison 158 | int actualLoc = static_cast(actualLocsVec[i]); // DUT output (0-based) 159 | int refLoc = refLocsVec[i]; // Reference file value (assumed 1-based) 160 | 161 | // --- MODIFIED COMPARISON --- 162 | // Adjust the 1-based reference index to 0-based for comparison 163 | int expectedLoc_0based = refLoc - 1; 164 | 165 | if (actualLoc != expectedLoc_0based) { 166 | if (errorCount < 20) { // Print more mismatches if they occur 167 | std::cerr << "Mismatch at output index " << i << ":" << std::endl; 168 | std::cerr << " Expected (0-based): " << expectedLoc_0based << " (from ref file value " << refLoc << ")" << std::endl; 169 | std::cerr << " Actual (0-based): " << actualLoc << std::endl; 170 | } else if (errorCount == 20) { 171 | std::cerr << "Further mismatches suppressed..." << std::endl; 172 | } 173 | match = false; 174 | errorCount++; 175 | } 176 | // --- END MODIFIED COMPARISON --- 177 | } 178 | 179 | // Report if sizes mismatched even if no element mismatches were found within comparisonLimit 180 | if (actualLocsVec.size() != refLocsVec.size()) { 181 | match = false; // Ensure test fails if sizes differ 182 | if (errorCount == 0) { // Only print this if no element mismatches were logged 183 | std::cerr << "Mismatch due to differing number of peaks." << std::endl; 184 | } 185 | } 186 | 187 | if (errorCount > 0) { 188 | std::cerr << "Total mismatches found: " << errorCount << std::endl; 189 | } 190 | 191 | 192 | // --- Report Results --- 193 | if (match) { 194 | std::cout << "----------------------------------------" << std::endl; 195 | std::cout << "--- Test PASSED ---" << std::endl; 196 | std::cout << "HLS implementation output matches the reference output (assuming 1-based reference indices)." << std::endl; 197 | std::cout << "----------------------------------------" << std::endl; 198 | return 0; // Success 199 | } else { 200 | std::cout << "----------------------------------------" << std::endl; 201 | std::cout << "--- Test FAILED ---" << std::endl; 202 | std::cout << "HLS implementation output does NOT match the reference output." << std::endl; 203 | std::cout << "----------------------------------------" << std::endl; 204 | return 1; // Failure 205 | } 206 | } -------------------------------------------------------------------------------- /example/vitis_hls.log: -------------------------------------------------------------------------------- 1 | 2 | ****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2024.2.2 (64-bit) 3 | **** SW Build 6049644 on Mar 5 2025 4 | **** IP Build 6050500 on Thu Mar 6 23:33:39 MST 2025 5 | **** SharedData Build 6060542 on Thu Mar 06 10:31:07 MST 2025 6 | **** Start of session at: Fri Aug 1 14:42:38 2025 7 | ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved. 8 | ** Copyright 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. 9 | 10 | source /opt/Xilinx/Vitis/2024.2/scripts/vitis_hls/hls.tcl -notrace 11 | INFO: [HLS 200-10] For user 'amd' on host 'amd' (Linux_x86_64 version 6.8.0-65-generic) on Fri Aug 01 14:42:39 AEST 2025 12 | INFO: [HLS 200-10] On os Ubuntu 22.04.5 LTS 13 | INFO: [HLS 200-10] In directory '/home/amd/UTS/llm-fpga-design/example' 14 | WARNING: [HLS 200-2053] The vitis_hls executable is deprecated. Consider using vitis-run --mode hls --tcl 15 | Sourcing Tcl script 'csynth.tcl' 16 | INFO: [HLS 200-1510] Running: open_project proj_peakPicker 17 | INFO: [HLS 200-10] Creating and opening project '/home/amd/UTS/llm-fpga-design/example/proj_peakPicker'. 18 | INFO: [HLS 200-1510] Running: set_top peakPicker 19 | INFO: [HLS 200-1510] Running: add_files peakPicker.cpp 20 | INFO: [HLS 200-10] Adding design file 'peakPicker.cpp' to the project 21 | INFO: [HLS 200-1510] Running: add_files -tb peakPicker_tb.cpp 22 | INFO: [HLS 200-10] Adding test bench file 'peakPicker_tb.cpp' to the project 23 | INFO: [HLS 200-1510] Running: add_files -tb ./locations_3_ref.txt 24 | INFO: [HLS 200-10] Adding test bench file './locations_3_ref.txt' to the project 25 | INFO: [HLS 200-1510] Running: add_files -tb ./peakLocs_out.txt 26 | INFO: [HLS 200-10] Adding test bench file './peakLocs_out.txt' to the project 27 | INFO: [HLS 200-1510] Running: add_files -tb ./pssCorrMagSq_3_in.txt 28 | INFO: [HLS 200-10] Adding test bench file './pssCorrMagSq_3_in.txt' to the project 29 | INFO: [HLS 200-1510] Running: add_files -tb ./threshold_in.txt 30 | INFO: [HLS 200-10] Adding test bench file './threshold_in.txt' to the project 31 | INFO: [HLS 200-1510] Running: open_solution solution1 32 | INFO: [HLS 200-10] Creating and opening solution '/home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1'. 33 | INFO: [HLS 200-1505] Using default flow_target 'vivado' 34 | Resolution: For help on HLS 200-1505 see docs.xilinx.com/access/sources/dita/topic?Doc_Version=2024.2%20English&url=ug1448-hls-guidance&resourceid=200-1505.html 35 | INFO: [HLS 200-1510] Running: set_part xc7k410t-ffg900-2 36 | INFO: [HLS 200-1611] Setting target device to 'xc7k410t-ffg900-2' 37 | INFO: [HLS 200-1510] Running: create_clock -period 3.90 -name default 38 | INFO: [SYN 201-201] Setting up clock 'default' with a period of 3.9ns. 39 | INFO: [HLS 200-1510] Running: set_clock_uncertainty 12.5% 40 | INFO: [SYN 201-201] Setting up clock 'default' with an uncertainty of 0.487ns. 41 | INFO: [HLS 200-1510] Running: csynth_design 42 | INFO: [HLS 200-111] Finished File checks and directory preparation: CPU user time: 0.04 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.04 seconds; current allocated memory: 640.586 MB. 43 | INFO: [HLS 200-10] Analyzing design file 'peakPicker.cpp' ... 44 | INFO: [HLS 200-111] Finished Source Code Analysis and Preprocessing: CPU user time: 1.67 seconds. CPU system time: 0.5 seconds. Elapsed time: 2.19 seconds; current allocated memory: 642.633 MB. 45 | INFO: [HLS 200-777] Using interface defaults for 'Vivado' flow target. 46 | INFO: [HLS 200-1995] There were 912 instructions in the design after the 'Compile/Link' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 47 | INFO: [HLS 200-1995] There were 298 instructions in the design after the 'Unroll/Inline (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 48 | INFO: [HLS 200-1995] There were 148 instructions in the design after the 'Unroll/Inline (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 49 | INFO: [HLS 200-1995] There were 146 instructions in the design after the 'Unroll/Inline (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 50 | INFO: [HLS 200-1995] There were 146 instructions in the design after the 'Unroll/Inline (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 51 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 52 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 53 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 54 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 55 | INFO: [HLS 200-1995] There were 70 instructions in the design after the 'Array/Struct (step 5)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 56 | INFO: [HLS 200-1995] There were 70 instructions in the design after the 'Performance (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 57 | INFO: [HLS 200-1995] There were 69 instructions in the design after the 'Performance (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 58 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Performance (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 59 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Performance (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 60 | INFO: [HLS 200-1995] There were 77 instructions in the design after the 'HW Transforms (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 61 | INFO: [HLS 200-1995] There were 79 instructions in the design after the 'HW Transforms (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt 62 | INFO: [HLS 214-186] Unrolling loop 'init_sr' (peakPicker.cpp:36:11) in function 'peakPicker_wrapper' completely with a factor of 11 (peakPicker.cpp:14:0) 63 | INFO: [HLS 200-111] Finished Compiling Optimization and Transform: CPU user time: 1.52 seconds. CPU system time: 0.44 seconds. Elapsed time: 6.66 seconds; current allocated memory: 652.320 MB. 64 | INFO: [HLS 200-111] Finished Checking Pragmas: CPU user time: 0 seconds. CPU system time: 0 seconds. Elapsed time: 0 seconds; current allocated memory: 652.320 MB. 65 | INFO: [HLS 200-10] Starting code transformations ... 66 | INFO: [HLS 200-111] Finished Standard Transforms: CPU user time: 0 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 652.391 MB. 67 | INFO: [HLS 200-10] Checking synthesizability ... 68 | INFO: [HLS 200-111] Finished Checking Synthesizability: CPU user time: 0.01 seconds. CPU system time: 0.01 seconds. Elapsed time: 0 seconds; current allocated memory: 652.406 MB. 69 | INFO: [XFORM 203-11] Balancing expressions in function 'peakPicker_wrapper' (peakPicker.cpp:47:13)...11 expression(s) balanced. 70 | INFO: [HLS 200-111] Finished Loop, function and other optimizations: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.03 seconds; current allocated memory: 673.844 MB. 71 | INFO: [HLS 200-111] Finished Architecture Synthesis: CPU user time: 0.02 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 674.102 MB. 72 | INFO: [HLS 200-10] Starting hardware synthesis ... 73 | INFO: [HLS 200-10] Synthesizing 'peakPicker' ... 74 | INFO: [HLS 200-10] ---------------------------------------------------------------- 75 | INFO: [HLS 200-42] -- Implementing module 'peakPicker_wrapper' 76 | INFO: [HLS 200-10] ---------------------------------------------------------------- 77 | INFO: [SCHED 204-11] Starting scheduling ... 78 | INFO: [SCHED 204-61] Pipelining loop 'ultra_main_loop'. 79 | INFO: [HLS 200-1470] Pipelining result : Target II = 1, Final II = 1, Depth = 4, loop 'ultra_main_loop' 80 | INFO: [SCHED 204-11] Finished scheduling. 81 | INFO: [HLS 200-111] Finished Scheduling: CPU user time: 0.04 seconds. CPU system time: 0.02 seconds. Elapsed time: 0.06 seconds; current allocated memory: 675.617 MB. 82 | INFO: [BIND 205-100] Starting micro-architecture generation ... 83 | INFO: [BIND 205-101] Performing variable lifetime analysis. 84 | INFO: [BIND 205-101] Exploring resource sharing. 85 | INFO: [BIND 205-101] Binding ... 86 | INFO: [BIND 205-100] Finished micro-architecture generation. 87 | INFO: [HLS 200-111] Finished Binding: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB. 88 | INFO: [HLS 200-10] ---------------------------------------------------------------- 89 | INFO: [HLS 200-42] -- Implementing module 'peakPicker' 90 | INFO: [HLS 200-10] ---------------------------------------------------------------- 91 | INFO: [SCHED 204-11] Starting scheduling ... 92 | INFO: [SCHED 204-11] Finished scheduling. 93 | INFO: [HLS 200-111] Finished Scheduling: CPU user time: 0.02 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB. 94 | INFO: [BIND 205-100] Starting micro-architecture generation ... 95 | INFO: [BIND 205-101] Performing variable lifetime analysis. 96 | INFO: [BIND 205-101] Exploring resource sharing. 97 | INFO: [BIND 205-101] Binding ... 98 | INFO: [BIND 205-100] Finished micro-architecture generation. 99 | INFO: [HLS 200-111] Finished Binding: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 675.617 MB. 100 | INFO: [HLS 200-10] ---------------------------------------------------------------- 101 | INFO: [HLS 200-10] -- Generating RTL for module 'peakPicker_wrapper' 102 | INFO: [HLS 200-10] ---------------------------------------------------------------- 103 | INFO: [HLS 200-1030] Apply Unified Pipeline Control on module 'peakPicker_wrapper' pipeline 'ultra_main_loop' pipeline type 'loop pipeline' 104 | INFO: [RTGEN 206-100] Finished creating RTL model for 'peakPicker_wrapper'. 105 | INFO: [HLS 200-111] Finished Creating RTL model: CPU user time: 0.02 seconds. CPU system time: 0 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB. 106 | INFO: [HLS 200-10] ---------------------------------------------------------------- 107 | INFO: [HLS 200-10] -- Generating RTL for module 'peakPicker' 108 | INFO: [HLS 200-10] ---------------------------------------------------------------- 109 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/xcorr' to 'ap_memory'. 110 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/threshold' to 'ap_memory'. 111 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/input_length' to 'ap_none'. 112 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/locations' to 'ap_memory'. 113 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/num_peaks' to 'ap_vld'. 114 | INFO: [RTGEN 206-500] Setting interface mode on function 'peakPicker' to 'ap_ctrl_hs'. 115 | INFO: [RTGEN 206-100] Finished creating RTL model for 'peakPicker'. 116 | INFO: [HLS 200-111] Finished Creating RTL model: CPU user time: 0.06 seconds. CPU system time: 0 seconds. Elapsed time: 0.07 seconds; current allocated memory: 676.344 MB. 117 | INFO: [HLS 200-111] Finished Generating all RTL models: CPU user time: 0.09 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.1 seconds; current allocated memory: 679.273 MB. 118 | INFO: [HLS 200-111] Finished Updating report files: CPU user time: 0.14 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.14 seconds; current allocated memory: 681.910 MB. 119 | INFO: [VHDL 208-304] Generating VHDL RTL for peakPicker. 120 | INFO: [VLOG 209-307] Generating Verilog RTL for peakPicker. 121 | INFO: [HLS 200-790] **** Loop Constraint Status: All loop constraints were satisfied. 122 | INFO: [HLS 200-789] **** Estimated Fmax: 310.46 MHz 123 | INFO: [HLS 200-2161] Finished Command csynth_design Elapsed time: 00:00:09; Allocated memory: 41.367 MB. 124 | INFO: [HLS 200-112] Total CPU user time: 6.22 seconds. Total CPU system time: 1.48 seconds. Total elapsed time: 12.27 seconds; peak allocated memory: 681.953 MB. 125 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # LLM-Aided FPGA Design Flow 2 | 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 4 | [![HLS Version](https://img.shields.io/badge/HLS-2023.2-blue.svg)](https://www.xilinx.com/products/design-tools/vitis/vitis-hls.html) 5 | 6 | ## Overview 7 | 8 | This repository demonstrates a modern approach to FPGA design using Large Language Models (LLMs) to automate and enhance the design workflow from MATLAB algorithms to optimized hardware implementations. By leveraging LLMs like Claude 3.7 Sonnet, GPT-4, or GitHub Copilot, we significantly reduce development time while maintaining design quality. 9 | 10 | The repository showcases: 11 | 12 | 1. Conversion of MATLAB reference algorithms to HLS C++ 13 | 2. Automated debugging of C simulation errors 14 | 3. Prompt engineering techniques for hardware design tasks 15 | 4. Performance optimization through LLM-guided directives 16 | 17 | ## Case Study: 5G NR Peak Picker 18 | 19 | Our primary example is a peak picker algorithm for 5G NR Synchronization Signal Block (SSB) detection, which demonstrates the complete LLM-assisted workflow from MATLAB specification to optimized HLS implementation. 20 | 21 | ### Algorithm Description 22 | 23 | The peak picker algorithm: 24 | - Takes PSS (Primary Synchronization Signal) correlation magnitude squared values as input 25 | - Compares values against thresholds to identify candidate peaks 26 | - Applies filtering to identify true peaks 27 | - Returns the locations (indices) of detected peaks 28 | 29 | ## LLM-Based HLS Code Generation and Debugging Workflow 30 | 31 | Our comprehensive workflow automates the entire process from MATLAB algorithm to optimized HLS implementation: 32 | 33 | ```mermaid 34 | graph TB 35 | subgraph Inputs 36 | A[MATLAB Prototype Files] -->|Input| B(Generate HLS Code) 37 | P[Prompt Template] -->|Format| B 38 | end 39 | 40 | subgraph AI_Code_Generation [AI Code Generation Process] 41 | B -->|Creates Prompt| C{Select LLM Service} 42 | C -->|Default| D[Gemini API] 43 | C -->|Fallback| E[OpenAI API] 44 | C -->|Fallback| F[Claude API] 45 | 46 | D & E & F -->|Generate| G[LLM Response] 47 | G -->|Parse| H[Extract Code] 48 | H -->|Save| I[Generated HLS Files] 49 | end 50 | 51 | subgraph Outputs 52 | I -->|Header| J[component.hpp] 53 | I -->|Implementation| K[component.cpp] 54 | I -->|Testbench| L[component_tb.cpp] 55 | end 56 | 57 | subgraph Verification 58 | J & K & L -->|Compile & Run| M[C Simulation] 59 | M -->|Pass| N[HLS Synthesis] 60 | M -->|Fail| O[Error Logs] 61 | end 62 | 63 | subgraph AI_Debug_Assistant [AI Debug Assistant] 64 | O -->|Input| Q(Debug Assistant) 65 | J & K & L -->|Source Code| Q 66 | Q -->|Creates Debug Prompt| R{Select LLM Service} 67 | R -->|Default| S[Gemini API] 68 | R -->|Fallback| T[OpenAI API] 69 | R -->|Fallback| U[Claude API] 70 | 71 | S & T & U -->|Analyze| V[LLM Debug Analysis] 72 | V -->|Generate| W[Debug Report] 73 | V -->|Extract| X[Code Fixes] 74 | X -->|Optional| Y[Apply Fixes] 75 | Y -->|Update| J & K & L 76 | end 77 | 78 | style D fill:#34A853,stroke:#34A853,color:white 79 | style S fill:#34A853,stroke:#34A853,color:white 80 | style G fill:#F9AB00,stroke:#F9AB00,color:white 81 | style V fill:#F9AB00,stroke:#F9AB00,color:white 82 | style I fill:#4285F4,stroke:#4285F4,color:white 83 | style W fill:#4285F4,stroke:#4285F4,color:white 84 | ``` 85 | 86 | ### Workflow Stages 87 | 88 | #### 1. Input Stage 89 | - **MATLAB Prototype Files**: Reference algorithm implementation in MATLAB 90 | - **Prompt Template**: Structured instructions for the LLM to follow when generating HLS code 91 | 92 | #### 2. AI Code Generation Process 93 | - **Creates Prompt**: Combines MATLAB code with template for comprehensive context 94 | - **Select LLM Service**: Chooses between Gemini (default), OpenAI, or Claude APIs 95 | - **LLM Response**: Raw text response containing code and explanations 96 | - **Extract Code**: Parses response to identify different file types and code sections 97 | - **Generated HLS Files**: Creates properly structured C++ files ready for simulation 98 | 99 | #### 3. Output Stage 100 | - **Header File**: Contains class definitions, function declarations, and constants 101 | - **Implementation File**: Contains the core HLS algorithm implementation with pragmas 102 | - **Testbench File**: Includes data loading, function calls, and verification logic 103 | 104 | #### 4. Verification Stage 105 | - **C Simulation**: Compile and test the generated code for functional correctness 106 | - **HLS Synthesis**: If simulation passes, proceed to hardware synthesis 107 | - **Error Logs**: If simulation fails, collect error information for debugging 108 | 109 | #### 5. AI Debug Assistant Stage 110 | - **Debug Assistant**: Takes error logs and source files as input 111 | - **Creates Debug Prompt**: Structures the debugging context for LLM analysis 112 | - **LLM Analysis**: AI analyzes errors and suggests specific code fixes 113 | - **Debug Report**: Comprehensive explanation of issues and solutions 114 | - **Code Fixes**: Specific code changes that can be automatically applied 115 | - **Apply Fixes**: Update source files with AI-suggested corrections 116 | 117 | ### Prompt Engineering for Code Generation 118 | 119 | We've developed specialized prompt templates for effective code generation: 120 | 121 | 1. **Context Section**: Explains the algorithm purpose and background 122 | 2. **Task Description**: Clearly defines what the LLM needs to implement 123 | 3. **Implementation Requirements**: Specifies coding standards, interfaces, and optimizations 124 | 4. **Deliverables**: Clearly states what files should be produced 125 | 126 | Example from our peak picker implementation: 127 | 128 | ```markdown 129 | # Copilot Instructions for Peak Picker Implementation 130 | 131 | ## Project Context 132 | This project implements a critical component of a 5G NR SSB detection application. 133 | The peak picker algorithm identifies SSB signals by locating peaks where the 134 | magnitude squared of the PSS correlation (`xcorr`) exceeds a predefined threshold. 135 | 136 | ## Task Description 137 | Your task is to translate the MATLAB peak picker algorithm into efficient HLS C++ 138 | code while preserving exact functionality. The implementation should be optimized 139 | for FPGA deployment using Xilinx HLS directives. 140 | 141 | [Additional sections...] 142 | ``` 143 | 144 | ## How the Debug Assistant Works 145 | 146 | The debug assistant provides automated, AI-powered analysis and correction of HLS simulation errors: 147 | 148 | ```mermaid 149 | graph TD 150 | subgraph Inputs 151 | A[Error Log] -->|read_file| C 152 | B[HLS C++ Source Files] -->|read_file| D 153 | end 154 | 155 | subgraph Processing 156 | C[Extract Error Information] --> E 157 | D[Parse Source Code] --> E 158 | E[Create Debug Prompt] --> F 159 | end 160 | 161 | subgraph LLM_Analysis 162 | F[Query LLM API] -->|model selection| G{Select Model} 163 | G -->|gemini-2.0-pro-exp| H[Gemini API] 164 | G -->|gpt-4/gpt-3.5-turbo| I[OpenAI API] 165 | G -->|claude-sonnet| J[Claude API] 166 | H --> K[LLM Analysis Response] 167 | I --> K 168 | J --> K 169 | end 170 | 171 | subgraph Outputs 172 | K --> L[Generate Debug Report] 173 | K --> M[Parse Code Corrections] 174 | 175 | L --> N[Save Markdown Report] 176 | M --> O[Apply Code Fixes] 177 | O -->|user confirmation| P[Edit Source Files] 178 | end 179 | 180 | style H fill:#34A853,stroke:#34A853,color:white 181 | style K fill:#F9AB00,stroke:#F9AB00,color:white 182 | style P fill:#4285F4,stroke:#4285F4,color:white 183 | style N fill:#4285F4,stroke:#4285F4,color:white 184 | ``` 185 | 186 | ### Debug Workflow Stages 187 | 188 | #### 1. Inputs Processing 189 | - **Error Log Analysis**: Extracts meaningful error patterns from C simulation logs 190 | - **Source Code Parsing**: Gathers relevant source files to provide complete context 191 | 192 | #### 2. Processing 193 | - **Extract Error Information**: Identifies specific error messages and patterns 194 | - **Parse Source Code**: Organizes code context for the LLM 195 | - **Create Debug Prompt**: Structures the debugging request with all relevant information 196 | 197 | #### 3. LLM Analysis 198 | - **Query LLM API**: Sends the prompt to the selected AI service 199 | - **Model Selection**: Chooses between Gemini (primary), GPT, or Claude models 200 | - **LLM Response**: AI analyzes the issues and provides detailed debugging guidance 201 | 202 | #### 4. Outputs 203 | - **Generate Debug Report**: Creates detailed markdown reports explaining errors and fixes 204 | - **Parse Code Corrections**: Extracts specific code changes from the LLM response 205 | - **Apply Code Fixes**: Optionally implements the suggested changes with user confirmation 206 | - **Edit Source Files**: Updates the original files with proper change tracking 207 | 208 | The debug assistant handles common HLS errors including: 209 | - Interface mismatches between implementation and testbench 210 | - Data type inconsistencies 211 | - Indexing errors 212 | - Algorithmic logical errors 213 | - Misunderstandings of HLS-specific behaviors 214 | 215 | ## LLM Selection and Integration 216 | 217 | Our tools support multiple LLM providers with different capabilities: 218 | 219 | - **Gemini Pro/Flash**: Offers strong reasoning about code structures and efficient debugging 220 | - **GPT-3.5/4**: Provides detailed code generation with comprehensive comments 221 | - **Claude Sonnet**: Excels at understanding complex algorithms and providing thorough explanations 222 | 223 | The framework automatically selects appropriate models based on task complexity, or allows specifying a model for specific use cases. 224 | 225 | ## Automated File Generation and Management 226 | 227 | The `generate_hls_code.py` tool implements sophisticated code extraction algorithms to: 228 | 229 | - Parse LLM responses for code blocks 230 | - Identify appropriate file types (header, implementation, testbench) 231 | - Generate properly formatted HLS C++ files 232 | - Maintain correct dependencies between files 233 | - Create project structures compatible with Vitis HLS 234 | 235 | ## Getting Started 236 | 237 | ### Prerequisites 238 | 239 | - Vitis HLS 2023.2 or newer 240 | - MATLAB R2023a or newer (for reference models) 241 | - Python 3.8+ with necessary libraries for data handling 242 | - API keys for supported LLM services (at least one of the following): 243 | - Google Gemini API key (recommended) 244 | - OpenAI API key 245 | - Anthropic Claude API key 246 | 247 | ### Installation 248 | 249 | ```bash 250 | # Clone this repository 251 | git clone https://github.com/rockyco/llm-fpga-design.git 252 | cd llm-fpga-design 253 | 254 | # Set up your environment 255 | source /path/to/Vitis/settings64.sh 256 | 257 | # Install required Python packages 258 | pip install -r requirements.txt 259 | 260 | # Add your API keys to the .bashrc or .env file 261 | echo "GEMINI_API_KEY=your_gemini_api_key" >> ~/.bashrc 262 | echo "OPENAI_API_KEY=your_openai_api_key" >> ~/.bashrc 263 | echo "CLAUDE_API_KEY=your_claude_api_key" >> ~/.bashrc 264 | source ~/.bashrc 265 | ``` 266 | 267 | ### Usage 268 | 269 | 1. **Generate HLS C++ from MATLAB reference**: 270 | Supported models: `gemini-2.0-flash-thinking-exp`, `gemini-2.0-pro-exp`, `gpt-4`, `gpt-3.5-turbo`, `claude-sonnet` 271 | ```bash 272 | python3 scripts/generate_hls_code.py --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m --prompt prompts/hls_conversion.md --model gemini-2.0-flash-thinking-exp 273 | ``` 274 | 275 | 2. **Run C simulation**: 276 | ```bash 277 | cd implementations/peakPicker 278 | make csim 279 | ``` 280 | 281 | 3. **Debug errors with LLM assistance**: 282 | ```bash 283 | cd ../../ 284 | python3 scripts/debug_assistant.py --error_log implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log --source_file implementations/peakPicker/peakPicker.cpp implementations/peakPicker/peakPicker.hpp implementations/peakPicker/peakPicker_tb.cpp 285 | ``` 286 | 287 | 4. **Synthesize and export RTL**: 288 | ```bash 289 | make csynth 290 | make export_ip 291 | ``` 292 | 293 | ## Code Generation Process 294 | 295 | The `generate_hls_code.py` script implements a comprehensive code generation pipeline: 296 | 297 | 1. **Code Analysis**: Examines MATLAB reference to understand algorithm function 298 | 2. **Prompt Construction**: Combines specialized templates with example code 299 | 3. **Model Selection**: Uses the most appropriate LLM based on task needs 300 | 4. **Response Processing**: Implements robust parsing to extract code blocks 301 | 5. **Code Organization**: Creates properly structured HLS project files 302 | 6. **Documentation**: Automatically preserves explanations from the LLM 303 | 304 | Key features include: 305 | - Support for multi-file MATLAB input 306 | - Robust code block extraction with multiple fallback strategies 307 | - File type identification based on content patterns 308 | - Project structure generation following HLS best practices 309 | 310 | ## Repository Structure 311 | 312 | ``` 313 | llm-fpga-design/ 314 | ├── algorithms/ # MATLAB reference implementations 315 | ├── implementations/ # Generated HLS C++ implementations 316 | │ └── peakPicker/ # Peak Picker implementation case study 317 | ├── prompts/ # LLM prompt templates 318 | ├── scripts/ # Automation scripts 319 | │ ├── generate_hls_code.py # Code generation script 320 | │ └── debug_assistant.py # Debugging assistant script 321 | ├── data/ # Test data files 322 | └── docs/ # Documentation 323 | ``` 324 | 325 | ## Best Practices 326 | 327 | Based on our experience, we recommend these best practices for LLM-assisted FPGA design: 328 | 329 | 1. **Structured Prompts**: Use clear, detailed prompts with specific sections for context, requirements, and deliverables 330 | 2. **Iterative Refinement**: Start with high-level requirements, then refine implementation details 331 | 3. **Input/Output Examples**: Provide concrete examples of expected behavior 332 | 4. **Domain-Specific Knowledge**: Include relevant HLS and FPGA concepts in prompts 333 | 5. **Error Analysis**: When debugging, provide complete error messages and surrounding context 334 | 6. **Model Selection**: Choose appropriate models for different tasks: 335 | - Use Gemini Flash for quick iterations and debugging 336 | - Use GPT-4 for complex algorithms needing careful implementation 337 | - Use Claude for detailed explanations and educational contexts 338 | 7. **Prompt Templates**: Maintain a library of effective prompt templates for reuse 339 | 8. **Human Review**: Always review and understand generated code before synthesis 340 | 341 | ## Limitations and Considerations 342 | 343 | - LLMs may not be aware of the latest HLS features or hardware-specific optimizations 344 | - Complex timing constraints might require manual refinement 345 | - While LLMs can generate optimized code, expert review is still recommended for critical applications 346 | - Actual hardware performance should be verified through physical implementation 347 | - LLMs may occasionally: 348 | - Generate incorrect pragma syntax that needs manual correction 349 | - Not fully understand resource vs. performance tradeoffs 350 | - Struggle with very complex interface requirements 351 | - Need help with target-specific optimizations 352 | 353 | ## Contributing 354 | 355 | Contributions are welcome! Please feel free to submit a Pull Request. 356 | 357 | 1. Fork the repository 358 | 2. Create your feature branch (`git checkout -b feature/amazing-feature`) 359 | 3. Commit your changes (`git commit -m 'Add some amazing feature'`) 360 | 4. Push to the branch (`git push origin feature/amazing-feature`) 361 | 5. Open a Pull Request 362 | 363 | ## License 364 | 365 | This project is licensed under the MIT License - see the LICENSE file for details. 366 | 367 | ## Acknowledgments 368 | 369 | - Thanks to the open-source HLS and FPGA design communities 370 | - Special thanks to the developers of Google Gemini 2.5 pro API, Claude 3.7 Sonnet, and GitHub Copilot for enabling this workflow 371 | 372 | -------------------------------------------------------------------------------- /scripts/optimize_hls_code.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import sys 6 | import requests 7 | import json 8 | import re 9 | import openai 10 | import google.generativeai as genai 11 | from pathlib import Path 12 | from datetime import datetime 13 | from dotenv import load_dotenv 14 | 15 | # Load environment variables for API keys 16 | load_dotenv() 17 | 18 | # Get API keys from environment variables 19 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 20 | GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') 21 | CLAUDE_API_KEY = os.getenv('CLAUDE_API_KEY') 22 | 23 | def parse_arguments(): 24 | """Parse command line arguments.""" 25 | parser = argparse.ArgumentParser(description='Optimize HLS C++ code for better performance using LLM') 26 | parser.add_argument('--source_dir', required=True, 27 | help='Directory containing HLS source files to optimize') 28 | parser.add_argument('--prompt', required=False, 29 | help='Path to prompt template file (or prompt name)') 30 | parser.add_argument('--output_dir', default=None, 31 | help='Directory to save optimized HLS code (defaults to source_dir)') 32 | parser.add_argument('--model', default='gemini-2.5-pro-exp-03-25', 33 | help='LLM model to use') 34 | parser.add_argument('--primary_goal', required=False, default="Reduce latency", 35 | help='Primary optimization goal (e.g., "Reduce latency by 30%")') 36 | parser.add_argument('--secondary_goal', required=False, default="Maintain resource usage", 37 | help='Secondary optimization goal (e.g., "Maintain resource usage")') 38 | parser.add_argument('--api_key', 39 | help='API key for LLM service') 40 | return parser.parse_args() 41 | 42 | def read_file(file_path): 43 | """Read and return the content of a file.""" 44 | try: 45 | with open(file_path, 'r') as f: 46 | return f.read() 47 | except Exception as e: 48 | print(f"Error reading file {file_path}: {e}") 49 | sys.exit(1) 50 | 51 | def find_source_files(source_dir): 52 | """Find all relevant HLS source files in the directory.""" 53 | source_files = {} 54 | for ext in ['.cpp', '.hpp', '.h', '_tb.cpp']: 55 | for file in Path(source_dir).glob(f'*{ext}'): 56 | source_files[file.name] = str(file) 57 | 58 | # Also look for csynth.rpt or other report files 59 | for report_file in Path(source_dir).glob('**/csynth.rpt'): 60 | source_files['csynth.rpt'] = str(report_file) 61 | 62 | # Look for implementation reports 63 | for report_file in Path(source_dir).glob('**/verilog/report/**/*.rpt'): 64 | source_files[f'report_{report_file.name}'] = str(report_file) 65 | 66 | return source_files 67 | 68 | def extract_performance_metrics(source_dir): 69 | """Extract performance metrics from synthesis and implementation reports.""" 70 | metrics = {} 71 | 72 | # Look for csynth.rpt 73 | csynth_path = None 74 | for path in Path(source_dir).glob('**/csynth.rpt'): 75 | csynth_path = path 76 | break 77 | 78 | if csynth_path: 79 | try: 80 | csynth_content = read_file(str(csynth_path)) 81 | 82 | # Extract latency information 83 | latency_match = re.search(r'Latency \(cycles\)\s*\|\s*min\s*\|\s*max\s*\|\s*min/max\s*\|\s*\n\s*\|\s*-+\s*\|\s*-+\s*\|\s*-+\s*\|\s*\n\s*\|\s*(\d+)\s*\|\s*(\d+)', csynth_content) 84 | if latency_match: 85 | metrics['latency_min'] = int(latency_match.group(1)) 86 | metrics['latency_max'] = int(latency_match.group(2)) 87 | 88 | # Extract resource utilization 89 | resource_pattern = r'(\|\s*([A-Za-z0-9]+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+%?\s*)\|)' 90 | resource_matches = re.findall(resource_pattern, csynth_content) 91 | 92 | if resource_matches: 93 | metrics['resources'] = {} 94 | for match in resource_matches: 95 | resource_type = match[1].strip() 96 | used = int(match[2]) 97 | total = int(match[3]) if match[3] != '0' else 0 98 | metrics['resources'][resource_type] = { 99 | 'used': used, 100 | 'total': total, 101 | 'utilization': f"{(used/total*100):.2f}%" if total > 0 else "N/A" 102 | } 103 | except Exception as e: 104 | print(f"Error extracting metrics from csynth.rpt: {e}") 105 | 106 | # Format metrics as a string for the prompt 107 | metrics_str = "## Performance Metrics\n\n" 108 | 109 | if 'latency_min' in metrics: 110 | metrics_str += f"### Latency\n- Minimum: {metrics['latency_min']} cycles\n- Maximum: {metrics['latency_max']} cycles\n\n" 111 | 112 | if 'resources' in metrics: 113 | metrics_str += "### Resource Utilization\n" 114 | for resource, values in metrics['resources'].items(): 115 | metrics_str += f"- {resource}: {values['used']} used / {values['total']} total ({values['utilization']})\n" 116 | 117 | if metrics_str == "## Performance Metrics\n\n": 118 | metrics_str += "No performance metrics available from synthesis reports." 119 | 120 | return metrics_str, metrics 121 | 122 | def create_optimization_prompt(source_files, performance_metrics, prompt_template, primary_goal, secondary_goal): 123 | """Create the prompt for code optimization.""" 124 | # Load source file contents 125 | source_contents = {} 126 | for name, path in source_files.items(): 127 | if name.endswith(('.cpp', '.hpp', '.h', '_tb.cpp')): 128 | try: 129 | source_contents[name] = read_file(path) 130 | except: 131 | source_contents[name] = f"Error reading {path}" 132 | 133 | # Create source files section 134 | source_files_str = "## Source Files\n\n" 135 | for name, content in source_contents.items(): 136 | source_files_str += f"### {name}\n```cpp\n{content}\n```\n\n" 137 | 138 | # Replace placeholders in template 139 | prompt = prompt_template 140 | replacements = { 141 | "SOURCE_FILES": source_files_str, 142 | "PERFORMANCE_METRICS": performance_metrics, 143 | "PRIMARY_GOAL": primary_goal, 144 | "SECONDARY_GOAL": secondary_goal 145 | } 146 | 147 | for key, value in replacements.items(): 148 | if f"{{{{{key}}}}}" in prompt: 149 | prompt = prompt.replace(f"{{{{{key}}}}}", value) 150 | 151 | return prompt 152 | 153 | def query_openai(prompt, model="gpt-4"): 154 | """Send a prompt to OpenAI API and get the response.""" 155 | if not OPENAI_API_KEY: 156 | print("Error: OPENAI_API_KEY environment variable not set.") 157 | sys.exit(1) 158 | 159 | url = "https://api.openai.com/v1/chat/completions" 160 | headers = { 161 | "Content-Type": "application/json", 162 | "Authorization": f"Bearer {OPENAI_API_KEY}" 163 | } 164 | data = { 165 | "model": model, 166 | "messages": [ 167 | {"role": "system", "content": "You are an expert FPGA developer specializing in HLS C++ optimization."}, 168 | {"role": "user", "content": prompt} 169 | ], 170 | "temperature": 0.1 # Lower temperature for more deterministic output 171 | } 172 | 173 | try: 174 | response = requests.post(url, headers=headers, data=json.dumps(data)) 175 | response.raise_for_status() # Raise exception for HTTP errors 176 | return response.json()["choices"][0]["message"]["content"] 177 | except requests.exceptions.RequestException as e: 178 | print(f"Error calling OpenAI API: {e}") 179 | if hasattr(e, 'response') and e.response: 180 | print(f"Response: {e.response.text}") 181 | sys.exit(1) 182 | 183 | def query_claude(prompt, model="claude-3-sonnet-20240229"): 184 | """Send a prompt to Anthropic Claude API and get the response.""" 185 | if not CLAUDE_API_KEY: 186 | print("Error: CLAUDE_API_KEY environment variable not set.") 187 | sys.exit(1) 188 | 189 | url = "https://api.anthropic.com/v1/messages" 190 | headers = { 191 | "Content-Type": "application/json", 192 | "x-api-key": CLAUDE_API_KEY, 193 | "anthropic-version": "2023-06-01" 194 | } 195 | data = { 196 | "model": model, 197 | "messages": [{"role": "user", "content": prompt}], 198 | "temperature": 0.1 199 | } 200 | 201 | try: 202 | response = requests.post(url, headers=headers, json=data) 203 | response.raise_for_status() 204 | return response.json()["content"][0]["text"] 205 | except requests.exceptions.RequestException as e: 206 | print(f"Error calling Claude API: {e}") 207 | if hasattr(e, 'response') and e.response: 208 | print(f"Response: {e.response.text}") 209 | sys.exit(1) 210 | 211 | def query_gemini(prompt, model="gemini-2.5-pro-exp-03-25"): 212 | """Send a prompt to Google Gemini API and get the response.""" 213 | if not GEMINI_API_KEY: 214 | print("Error: GEMINI_API_KEY environment variable not set.") 215 | sys.exit(1) 216 | 217 | # Extract the model name for the URL 218 | url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" 219 | 220 | headers = { 221 | "Content-Type": "application/json" 222 | } 223 | params = { 224 | "key": GEMINI_API_KEY 225 | } 226 | data = { 227 | "contents": [{"parts": [{"text": "You are an expert FPGA developer specializing in HLS C++ optimization.\n\n" + prompt}]}], 228 | "generationConfig": { 229 | "temperature": 0.1 230 | } 231 | } 232 | 233 | try: 234 | response = requests.post(url, headers=headers, params=params, json=data) 235 | response.raise_for_status() 236 | return response.json()["candidates"][0]["content"]["parts"][0]["text"] 237 | except requests.exceptions.RequestException as e: 238 | print(f"Error calling Gemini API: {e}") 239 | if hasattr(e, 'response') and e.response: 240 | print(f"Response: {e.response.text}") 241 | sys.exit(1) 242 | 243 | def query_llm(prompt, model="gemini-2.5-pro-exp-03-25"): 244 | """Route the query to the appropriate LLM API based on the model.""" 245 | if model.startswith("gemini"): 246 | return query_gemini(prompt, model) 247 | elif model.startswith("gpt"): 248 | return query_openai(prompt, model) 249 | elif model.startswith("claude"): 250 | return query_claude(prompt, model) 251 | else: 252 | print(f"Error: Unsupported model {model}.") 253 | sys.exit(1) 254 | 255 | def extract_optimized_code(llm_response): 256 | """Extract optimized code blocks from LLM response.""" 257 | optimized_code = {} 258 | 259 | # Extract all code blocks with filenames 260 | filename_patterns = [ 261 | r'###\s+([a-zA-Z0-9_]+\.[ch]pp)\s*```cpp\s*(.*?)```', 262 | r'File:\s*([a-zA-Z0-9_]+\.[ch]pp)\s*```cpp\s*(.*?)```', 263 | r'```cpp\s*//\s*([a-zA-Z0-9_]+\.[ch]pp)\s*(.*?)```' 264 | ] 265 | 266 | for pattern in filename_patterns: 267 | matches = re.findall(pattern, llm_response, re.DOTALL) 268 | for filename, code in matches: 269 | optimized_code[filename.strip()] = code.strip() 270 | 271 | return optimized_code 272 | 273 | def apply_optimizations(source_dir, output_dir, optimized_code, llm_response): 274 | """Apply the optimized code to the files.""" 275 | # Create output directory if it doesn't exist 276 | if output_dir and output_dir != source_dir: 277 | os.makedirs(output_dir, exist_ok=True) 278 | else: 279 | output_dir = source_dir 280 | 281 | # Create backup directory 282 | backup_dir = os.path.join(source_dir, "backup_original") 283 | os.makedirs(backup_dir, exist_ok=True) 284 | 285 | # Backup original files 286 | for filename in optimized_code.keys(): 287 | source_file = os.path.join(source_dir, filename) 288 | if os.path.exists(source_file): 289 | backup_file = os.path.join(backup_dir, filename) 290 | try: 291 | with open(source_file, 'r') as src, open(backup_file, 'w') as dst: 292 | dst.write(src.read()) 293 | print(f"Backed up {filename} to {backup_dir}/") 294 | except Exception as e: 295 | print(f"Error backing up {filename}: {e}") 296 | 297 | # Write optimized code to files 298 | for filename, code in optimized_code.items(): 299 | output_file = os.path.join(output_dir, filename) 300 | try: 301 | with open(output_file, 'w') as f: 302 | f.write(code) 303 | print(f"Applied optimization to {filename}") 304 | except Exception as e: 305 | print(f"Error writing optimized code to {filename}: {e}") 306 | 307 | # Create a log file with the optimization details 308 | log_file = os.path.join(output_dir, "optimization_log.md") 309 | with open(log_file, 'w') as f: 310 | f.write("# HLS Code Optimization Log\n\n") 311 | f.write(f"Optimization performed on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") 312 | f.write("## Files Modified\n\n") 313 | for filename in optimized_code.keys(): 314 | f.write(f"- {filename}\n") 315 | f.write("\n## Optimizations Applied\n\n") 316 | f.write(llm_response) 317 | 318 | print(f"Optimization log saved to {log_file}") 319 | 320 | return log_file 321 | 322 | def main(): 323 | args = parse_arguments() 324 | 325 | # Load environment variables for API keys 326 | load_dotenv() 327 | 328 | # Set API key from args or environment variable 329 | api_key = args.api_key 330 | if api_key: 331 | if "gemini" in args.model.lower(): 332 | os.environ['GEMINI_API_KEY'] = api_key 333 | elif "claude" in args.model.lower(): 334 | os.environ['CLAUDE_API_KEY'] = api_key 335 | else: 336 | os.environ['OPENAI_API_KEY'] = api_key 337 | 338 | # Find source files 339 | source_files = find_source_files(args.source_dir) 340 | if not source_files: 341 | print(f"Error: No source files found in {args.source_dir}") 342 | sys.exit(1) 343 | 344 | print(f"Found {len(source_files)} source files in {args.source_dir}") 345 | for name in source_files: 346 | print(f" - {name}") 347 | 348 | # Extract performance metrics 349 | performance_metrics_str, metrics = extract_performance_metrics(args.source_dir) 350 | 351 | # Set output directory 352 | output_dir = args.output_dir if args.output_dir else args.source_dir 353 | 354 | # Load prompt template 355 | if args.prompt and os.path.isfile(args.prompt): 356 | prompt_template = read_file(args.prompt) 357 | else: 358 | # Try to find the performance_optimization.md prompt 359 | script_dir = os.path.dirname(os.path.abspath(__file__)) 360 | project_dir = os.path.dirname(script_dir) 361 | prompt_path = os.path.join(project_dir, "prompts", "performance_optimization.md") 362 | 363 | if os.path.isfile(prompt_path): 364 | prompt_template = read_file(prompt_path) 365 | else: 366 | print("Error: Could not find performance_optimization.md prompt") 367 | basic_template = """# HLS Performance Optimization 368 | 369 | Please analyze the provided HLS source code and suggest optimizations to improve performance based on the provided metrics. 370 | 371 | ## Source Files 372 | {{SOURCE_FILES}} 373 | 374 | ## Performance Metrics 375 | {{PERFORMANCE_METRICS}} 376 | 377 | ## Optimization Goals 378 | - Primary goal: {{PRIMARY_GOAL}} 379 | - Secondary goal: {{SECONDARY_GOAL}} 380 | 381 | Please suggest specific HLS pragmas and code modifications to achieve these goals. 382 | """ 383 | prompt_template = basic_template 384 | print("Using basic optimization prompt template") 385 | 386 | # Create the optimization prompt 387 | optimization_prompt = create_optimization_prompt( 388 | source_files, 389 | performance_metrics_str, 390 | prompt_template, 391 | args.primary_goal, 392 | args.secondary_goal 393 | ) 394 | 395 | # Get component name from directory 396 | component_name = os.path.basename(os.path.normpath(args.source_dir)) 397 | 398 | print(f"Generating optimizations for {component_name}...") 399 | print(f"Using model: {args.model}") 400 | print(f"Primary goal: {args.primary_goal}") 401 | print(f"Secondary goal: {args.secondary_goal}") 402 | 403 | # Call the LLM 404 | llm_response = query_llm(optimization_prompt, args.model) 405 | 406 | # Extract optimized code 407 | optimized_code = extract_optimized_code(llm_response) 408 | 409 | if not optimized_code: 410 | print("Warning: No optimized code blocks detected in the LLM response.") 411 | # Save the full response 412 | response_path = os.path.join(args.source_dir, "optimization_suggestions.md") 413 | with open(response_path, 'w') as f: 414 | f.write(llm_response) 415 | print(f"Saved optimization suggestions to {response_path}") 416 | sys.exit(1) 417 | 418 | # Apply optimizations 419 | log_file = apply_optimizations(args.source_dir, output_dir, optimized_code, llm_response) 420 | 421 | print("\nOptimization complete!") 422 | print(f"Optimized {len(optimized_code)} files") 423 | print(f"Output directory: {output_dir}") 424 | print(f"Optimization log: {log_file}") 425 | 426 | # Track feedback for prompt improvement 427 | feedback_file = os.path.join(project_dir, "prompt_feedback.json") 428 | os.makedirs(os.path.dirname(feedback_file), exist_ok=True) 429 | 430 | with open(feedback_file, "a") as f: 431 | feedback = { 432 | "prompt": "performance_optimization", 433 | "timestamp": datetime.now().isoformat(), 434 | "model": args.model, 435 | "component": component_name, 436 | "status": "success" if optimized_code else "partial", 437 | "files_optimized": len(optimized_code), 438 | "optimized_files": list(optimized_code.keys()) 439 | } 440 | f.write(json.dumps(feedback) + "\n") 441 | 442 | # Return the output directory for the orchestrator 443 | print(output_dir) 444 | 445 | if __name__ == "__main__": 446 | main() 447 | -------------------------------------------------------------------------------- /implementations/peakPicker/llm_response.md: -------------------------------------------------------------------------------- 1 | Okay, here is the HLS C++ implementation for the `peakPicker` component, following your detailed instructions. 2 | 3 | **Assumptions:** 4 | 5 | 1. **Fixed-Point Precision:** Since the exact range and required precision of the input `xcorr` and `threshold` signals are not specified, I'll use `ap_fixed<32, 16>` as a starting point. This provides 16 bits for the integer part (including the sign bit) and 16 bits for the fractional part. **This should be adjusted based on actual signal analysis for the specific application.** 6 | 2. **Input Data Format:** The input files (`peakPicker_in.txt`, `threshold_in.txt`) contain one floating-point number per line. 7 | 3. **Reference Output Format:** The reference file (`peakPicker_ref.txt`) contains one integer location index per line. 8 | 4. **Maximum Number of Samples:** The testbench assumes the input files contain all samples. The HLS function takes `numSamples` as an argument. 9 | 10 | --- 11 | 12 | **File: `peakPicker.hpp`** 13 | 14 | ```cpp 15 | #ifndef PEAK_PICKER_HPP 16 | #define PEAK_PICKER_HPP 17 | 18 | #include 19 | #include 20 | #include // For integer types if needed for indices 21 | 22 | //-------------------------------------------------------------------------- 23 | // Constants and Parameters 24 | //-------------------------------------------------------------------------- 25 | 26 | // Fixed-point type configuration (Adjust W and I based on signal analysis) 27 | constexpr int DATA_W = 32; // Total width 28 | constexpr int DATA_I = 16; // Integer width (including sign bit) 29 | 30 | // Sliding window configuration (Matches MATLAB reference) 31 | constexpr int WINDOW_LENGTH = 11; 32 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2; // Index 5 (0-based) 33 | 34 | // Index type configuration (Ensure it can hold the maximum sample index) 35 | constexpr int INDEX_W = 32; // Width for location indices 36 | 37 | //-------------------------------------------------------------------------- 38 | // Type Definitions 39 | //-------------------------------------------------------------------------- 40 | 41 | // Fixed-point type for input data (xcorr and threshold) 42 | typedef ap_fixed Data_t; 43 | 44 | // Type for output location indices 45 | // Using ap_uint for non-negative indices. Use ap_int if indices can be negative. 46 | // Or simply use 'int' if standard integer sizes are sufficient and synthesizable. 47 | typedef ap_uint Index_t; 48 | // typedef int Index_t; // Alternative if standard int is sufficient 49 | 50 | //-------------------------------------------------------------------------- 51 | // Function Declaration 52 | //-------------------------------------------------------------------------- 53 | 54 | /** 55 | * @brief Finds peaks in a cross-correlation signal using a sliding window. 56 | * 57 | * @param xcorrStream Input stream of cross-correlation magnitude squared values. 58 | * @param thresholdStream Input stream of threshold values (corresponding to xcorr samples). 59 | * @param locationStream Output stream for detected peak location indices. 60 | * @param numSamples Total number of samples to process from the input streams. 61 | */ 62 | void peakPicker( 63 | hls::stream& xcorrStream, 64 | hls::stream& thresholdStream, 65 | hls::stream& locationStream, 66 | int numSamples 67 | ); 68 | 69 | #endif // PEAK_PICKER_HPP 70 | ``` 71 | 72 | --- 73 | 74 | **File: `peakPicker.cpp`** 75 | 76 | ```cpp 77 | #include "peakPicker.hpp" 78 | 79 | /** 80 | * @brief Core implementation of the peakPicker algorithm for HLS. 81 | * 82 | * Implements a sliding window peak detector. A sample is considered a peak if: 83 | * 1. It is the maximum value within a window of WINDOW_LENGTH samples centered around it. 84 | * 2. Its value exceeds the corresponding threshold value. 85 | * 86 | * This function processes streaming input data and outputs the indices of detected peaks. 87 | */ 88 | void peakPicker( 89 | hls::stream& xcorrStream, 90 | hls::stream& thresholdStream, 91 | hls::stream& locationStream, 92 | int numSamples 93 | ) { 94 | // Use function scope for pragmas if applying to the whole function's interface 95 | // #pragma HLS INTERFACE axis port=xcorrStream 96 | // #pragma HLS INTERFACE axis port=thresholdStream 97 | // #pragma HLS INTERFACE axis port=locationStream 98 | // #pragma HLS INTERFACE s_axilite port=numSamples bundle=control 99 | // #pragma HLS INTERFACE s_axilite port=return bundle=control 100 | 101 | // Buffers to hold the current window of data and thresholds 102 | // These implement the sliding window mechanism. 103 | Data_t xcorrBuffer[WINDOW_LENGTH]; 104 | Data_t thresholdBuffer[WINDOW_LENGTH]; 105 | 106 | // Partitioning the arrays allows parallel access to elements within the 107 | // pipelined loop, mapping them to registers for II=1. 108 | #pragma HLS ARRAY_PARTITION variable=xcorrBuffer complete dim=1 109 | #pragma HLS ARRAY_PARTITION variable=thresholdBuffer complete dim=1 110 | 111 | // Initialize buffers (optional, but good practice for simulation) 112 | // Can be skipped if the initial state doesn't affect the first valid output 113 | for (int i = 0; i < WINDOW_LENGTH; ++i) { 114 | #pragma HLS UNROLL 115 | xcorrBuffer[i] = 0; 116 | thresholdBuffer[i] = 0; 117 | } 118 | 119 | // Main processing loop iterates through all input samples 120 | // Apply PIPELINE directive for high throughput (initiation interval II=1) 121 | main_loop: 122 | for (int i = 0; i < numSamples; ++i) { 123 | #pragma HLS PIPELINE II=1 124 | 125 | // 1. Shift Buffers: Make space for the new sample at index 0 126 | // Shift existing elements towards the end of the buffer 127 | shift_loop: 128 | for (int k = WINDOW_LENGTH - 1; k > 0; --k) { 129 | #pragma HLS UNROLL // Unroll this small loop for efficiency 130 | xcorrBuffer[k] = xcorrBuffer[k - 1]; 131 | thresholdBuffer[k] = thresholdBuffer[k - 1]; 132 | } 133 | 134 | // 2. Read New Samples: Read from input streams and place at the start (index 0) 135 | Data_t newXcorrSample = xcorrStream.read(); 136 | Data_t newThresholdSample = thresholdStream.read(); 137 | xcorrBuffer[0] = newXcorrSample; 138 | thresholdBuffer[0] = newThresholdSample; 139 | 140 | // 3. Peak Detection Logic: Start after the window is filled 141 | // The first potential peak can be checked when i = WINDOW_LENGTH - 1 142 | // At this point, the sample corresponding to the middle of the *first full window* 143 | // is located at xcorrBuffer[MIDDLE_LOCATION]. This sample was originally read 144 | // at iteration i = (WINDOW_LENGTH - 1) - MIDDLE_LOCATION. 145 | if (i >= WINDOW_LENGTH - 1) { 146 | // Get the sample at the center of the current window 147 | Data_t middleSample = xcorrBuffer[MIDDLE_LOCATION]; 148 | Data_t middleThreshold = thresholdBuffer[MIDDLE_LOCATION]; 149 | 150 | // Check if the middle sample is the maximum in the window 151 | bool isPeak = true; // Assume it's a peak initially 152 | compare_loop: 153 | for (int k = 0; k < WINDOW_LENGTH; ++k) { 154 | #pragma HLS UNROLL // Unroll comparison loop for parallelism 155 | // Check if any other sample in the window is strictly greater 156 | // Note: MATLAB's `all(CompareOut <= 0)` means middleSample >= all others. 157 | if (k != MIDDLE_LOCATION && xcorrBuffer[k] > middleSample) { 158 | isPeak = false; 159 | break; // Exit loop early if not a peak 160 | } 161 | } 162 | 163 | // Check if the peak condition is met (local maximum AND above threshold) 164 | if (isPeak && (middleSample > middleThreshold)) { 165 | // Calculate the index of the peak in the original input stream 166 | // The sample currently at middleBuffer[MIDDLE_LOCATION] was read 167 | // MIDDLE_LOCATION iterations ago. 168 | Index_t peakLocation = i - MIDDLE_LOCATION; 169 | 170 | // Write the detected peak location to the output stream 171 | locationStream.write(peakLocation); 172 | } 173 | } 174 | } // end main_loop 175 | } 176 | ``` 177 | 178 | --- 179 | 180 | **File: `peakPicker_tb.cpp`** 181 | 182 | ```cpp 183 | #include "peakPicker.hpp" 184 | #include 185 | #include 186 | #include 187 | #include 188 | #include // For std::abs 189 | #include // For numeric_limits 190 | #include // For std::setprecision 191 | 192 | // Define input/output file names 193 | const std::string XCORR_INPUT_FILE = "pssCorrMagSq_3_in.txt"; // Matches MATLAB TB 194 | const std::string THRESHOLD_INPUT_FILE = "threshold_in.txt"; // Matches MATLAB TB 195 | const std::string REF_OUTPUT_FILE = "locations_3_ref.txt"; // Matches MATLAB TB 196 | // const std::string REF_OUTPUT_FILE = "peakLocs_out.txt"; // Use this if comparing against MATLAB output file 197 | 198 | // Function to read data from a file into a vector of doubles 199 | bool readDataFile(const std::string& filename, std::vector& data) { 200 | std::ifstream infile(filename); 201 | if (!infile.is_open()) { 202 | std::cerr << "Error: Could not open file: " << filename << std::endl; 203 | return false; 204 | } 205 | double value; 206 | while (infile >> value) { 207 | data.push_back(value); 208 | } 209 | if (infile.bad()) { 210 | std::cerr << "Error: Failed reading data from file: " << filename << std::endl; 211 | infile.close(); 212 | return false; 213 | } 214 | infile.close(); 215 | if (data.empty()) { 216 | std::cerr << "Warning: No data read from file: " << filename << std::endl; 217 | // Decide if this is an error or acceptable 218 | } 219 | std::cout << "Read " << data.size() << " values from " << filename << std::endl; 220 | return true; 221 | } 222 | 223 | // Function to read integer data (locations) from a file 224 | bool readIntDataFile(const std::string& filename, std::vector& data) { 225 | std::ifstream infile(filename); 226 | if (!infile.is_open()) { 227 | std::cerr << "Error: Could not open file: " << filename << std::endl; 228 | return false; 229 | } 230 | int value; 231 | while (infile >> value) { 232 | data.push_back(value); 233 | } 234 | if (infile.bad()) { 235 | std::cerr << "Error: Failed reading data from file: " << filename << std::endl; 236 | infile.close(); 237 | return false; 238 | } 239 | infile.close(); 240 | if (data.empty()) { 241 | std::cerr << "Warning: No data read from file: " << filename << std::endl; 242 | // Decide if this is an error or acceptable 243 | } 244 | std::cout << "Read " << data.size() << " values from " << filename << std::endl; 245 | return true; 246 | } 247 | 248 | 249 | int main() { 250 | std::cout << "--- Starting Peak Picker Testbench ---" << std::endl; 251 | 252 | // --- Data Loading --- 253 | std::vector xcorrVec, thresholdVec; 254 | std::vector refLocsVec; // Use int for reference locations 255 | 256 | std::cout << "Loading input data..." << std::endl; 257 | if (!readDataFile(XCORR_INPUT_FILE, xcorrVec)) return 1; 258 | if (!readDataFile(THRESHOLD_INPUT_FILE, thresholdVec)) return 1; 259 | 260 | std::cout << "Loading reference output data..." << std::endl; 261 | if (!readIntDataFile(REF_OUTPUT_FILE, refLocsVec)) return 1; 262 | 263 | // Basic input validation 264 | if (xcorrVec.size() != thresholdVec.size()) { 265 | std::cerr << "Error: Input xcorr size (" << xcorrVec.size() 266 | << ") does not match threshold size (" << thresholdVec.size() << ")" << std::endl; 267 | return 1; 268 | } 269 | if (xcorrVec.empty()) { 270 | std::cerr << "Error: Input data vectors are empty." << std::endl; 271 | return 1; 272 | } 273 | 274 | int numSamples = xcorrVec.size(); 275 | std::cout << "Number of samples to process: " << numSamples << std::endl; 276 | 277 | // --- Stream Preparation --- 278 | hls::stream xcorrStream("xcorrStream"); 279 | hls::stream thresholdStream("thresholdStream"); 280 | hls::stream locationStream("locationStream"); 281 | 282 | std::cout << "Populating input streams..." << std::endl; 283 | for (int i = 0; i < numSamples; ++i) { 284 | // Convert double to fixed-point Data_t 285 | xcorrStream.write(static_cast(xcorrVec[i])); 286 | thresholdStream.write(static_cast(thresholdVec[i])); 287 | } 288 | std::cout << "Input streams populated." << std::endl; 289 | 290 | // --- Call the DUT (Device Under Test) --- 291 | std::cout << "Calling HLS peakPicker function..." << std::endl; 292 | peakPicker(xcorrStream, thresholdStream, locationStream, numSamples); 293 | std::cout << "HLS peakPicker function finished." << std::endl; 294 | 295 | // --- Collect Results --- 296 | std::vector actualLocsVec; 297 | std::cout << "Collecting results from output stream..." << std::endl; 298 | while (!locationStream.empty()) { 299 | actualLocsVec.push_back(locationStream.read()); 300 | } 301 | std::cout << "Collected " << actualLocsVec.size() << " peak locations." << std::endl; 302 | 303 | // --- Verification --- 304 | std::cout << "Comparing HLS results with reference..." << std::endl; 305 | bool match = true; 306 | int errorCount = 0; 307 | 308 | if (actualLocsVec.size() != refLocsVec.size()) { 309 | std::cerr << "Error: Mismatch in number of detected peaks!" << std::endl; 310 | std::cerr << " Expected: " << refLocsVec.size() << std::endl; 311 | std::cerr << " Actual: " << actualLocsVec.size() << std::endl; 312 | match = false; 313 | } else { 314 | std::cout << "Number of peaks matches reference (" << refLocsVec.size() << ")." << std::endl; 315 | for (size_t i = 0; i < refLocsVec.size(); ++i) { 316 | // Direct comparison for integer indices 317 | // Convert Index_t (ap_uint) to int for comparison if necessary 318 | int actualLoc = static_cast(actualLocsVec[i]); 319 | int refLoc = refLocsVec[i]; 320 | 321 | if (actualLoc != refLoc) { 322 | if (errorCount < 10) { // Print first few mismatches 323 | std::cerr << "Mismatch at index " << i << ": Expected = " << refLoc 324 | << ", Actual = " << actualLoc << std::endl; 325 | } 326 | match = false; 327 | errorCount++; 328 | } 329 | } 330 | if (errorCount > 0) { 331 | std::cerr << "Total mismatches found: " << errorCount << std::endl; 332 | } 333 | } 334 | 335 | // --- Report Results --- 336 | if (match) { 337 | std::cout << "--- Test PASSED ---" << std::endl; 338 | std::cout << "HLS implementation output matches the reference output." << std::endl; 339 | return 0; // Success 340 | } else { 341 | std::cout << "--- Test FAILED ---" << std::endl; 342 | std::cout << "HLS implementation output does NOT match the reference output." << std::endl; 343 | return 1; // Failure 344 | } 345 | } 346 | ``` 347 | 348 | --- 349 | 350 | **Description of Optimization Approaches and Impact:** 351 | 352 | 1. **`#pragma HLS PIPELINE II=1`:** Applied to the `main_loop`. This is the most critical optimization for throughput. It instructs HLS to attempt initiating a new loop iteration every clock cycle. This significantly increases the processing rate, allowing the design to handle high-speed streaming data. Without it, the loop latency would dictate the throughput, which would be much lower. 353 | 2. **`#pragma HLS ARRAY_PARTITION variable=... complete dim=1`:** Applied to `xcorrBuffer` and `thresholdBuffer`. Since the `PIPELINE` pragma requires concurrent access to multiple elements of these buffers within a single clock cycle (for shifting and comparison), partitioning them completely breaks them down into individual registers. This eliminates memory access bottlenecks and allows the `II=1` target to be met. For small arrays like these (size 11), complete partitioning is feasible and efficient in terms of register usage vs. the performance gain. If the window were much larger, other partitioning schemes (e.g., `cyclic`, `block`) or mapping to BRAM/URAM might be considered to balance resources. 354 | 3. **`#pragma HLS UNROLL`:** Applied to the small inner loops (`shift_loop`, `compare_loop`). This duplicates the loop body's hardware, allowing all operations within those loops to execute in parallel within a single iteration of the pipelined outer loop. This helps reduce the latency of the outer loop iteration and supports achieving `II=1`. 355 | 4. **`hls::stream` Interface:** Using streams enables efficient data transfer between HLS blocks and external interfaces on the FPGA. Streams naturally fit pipelined and dataflow architectures, preventing large intermediate buffers and allowing continuous data processing. 356 | 5. **Fixed-Point Arithmetic (`ap_fixed`)**: While primarily a functional requirement for bit-accuracy and hardware efficiency, using fixed-point types instead of floating-point significantly reduces resource usage (LUTs, FFs, DSPs) on the FPGA, making the design smaller, faster, and lower power. The choice of `W` and `I` directly impacts resource usage and numerical accuracy. 357 | 6. **Dataflow (`#pragma HLS DATAFLOW`)**: *Not explicitly used* in this specific `peakPicker` function because the internal structure is a single, tightly coupled loop. Dataflow is more beneficial when a function can be broken down into distinct sequential tasks (e.g., TaskA -> TaskB -> TaskC) that can operate concurrently on different sets of data, communicating via streams or FIFOs. If `peakPicker` were part of a larger chain of processing blocks within a top-level function, `DATAFLOW` would be applied at that higher level to pipeline the execution of `peakPicker` relative to the other blocks. 358 | 359 | **Impact Summary:** 360 | 361 | * **Throughput:** Significantly increased due to `PIPELINE II=1`. The design can process one input sample per clock cycle after the initial pipeline fill latency. 362 | * **Latency:** The latency to produce the *first* output peak location is roughly `WINDOW_LENGTH` cycles (to fill the buffer) plus the pipeline depth of the main loop. The latency between subsequent outputs depends on the peak spacing but can be as low as 1 cycle if peaks occur consecutively. 363 | * **Resource Usage:** 364 | * `ARRAY_PARTITION complete` increases register usage but avoids BRAM. 365 | * `PIPELINE` and `UNROLL` increase logic usage (LUTs/FFs) to achieve parallelism. 366 | * Using `ap_fixed` is much more resource-efficient than floating-point. The specific `W` and `I` values determine the exact resource cost. 367 | 368 | This implementation provides a high-throughput, functionally correct HLS version of the MATLAB peak picker, ready for synthesis and integration into an FPGA design. Remember to validate and potentially adjust the `Data_t` fixed-point type based on your specific signal characteristics. -------------------------------------------------------------------------------- /agent_log.txt: -------------------------------------------------------------------------------- 1 | 2025-04-06 14:59:08,864 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 2 | 2025-04-06 14:59:08,864 [INFO] Registered agent: code_generator 3 | 2025-04-06 14:59:08,864 [INFO] Registered agent: debug_assistant 4 | 2025-04-06 14:59:08,864 [INFO] Registered agent: makefile_generator 5 | 2025-04-06 14:59:08,864 [INFO] Registered agent: performance_optimizer 6 | 2025-04-06 14:59:08,864 [INFO] Registered agent: hls_builder 7 | 2025-04-06 14:59:08,864 [INFO] Registered agent: documentation_generator 8 | 2025-04-06 14:59:08,864 [INFO] Workflow defined with 9 steps 9 | 2025-04-06 14:59:08,864 [INFO] Starting workflow execution 10 | 2025-04-06 14:59:08,865 [INFO] Executing workflow step: generate_code 11 | 2025-04-06 14:59:08,865 [INFO] Running agent code_generator for step generate_code 12 | 2025-04-06 14:59:08,865 [INFO] Agent code_generator status: running 13 | 2025-04-06 14:59:08,865 [INFO] Running command: /home/jielei/Projects/UTS/llm-fpga-design/.venv/bin/python3 /home/jielei/Projects/UTS/llm-fpga-design/scripts/generate_hls_code.py --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m --prompt /home/jielei/Projects/UTS/llm-fpga-design/prompts/hls_generation.md --output_dir /home/jielei/Projects/UTS/llm-fpga-design/implementations --model gemini-2.5-pro-exp-03-25 14 | 2025-04-06 15:00:21,531 [INFO] Agent code_generator status: success 15 | 2025-04-06 15:00:21,532 [INFO] Executing workflow step: generate_makefile 16 | 2025-04-06 15:00:21,532 [INFO] Running agent makefile_generator for step generate_makefile 17 | 2025-04-06 15:00:21,532 [INFO] Agent makefile_generator status: running 18 | 2025-04-06 15:00:21,532 [INFO] Generating Makefile for peakPicker in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker 19 | 2025-04-06 15:00:21,532 [INFO] Generated Makefile from template at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/Makefile 20 | 2025-04-06 15:00:21,532 [INFO] Agent makefile_generator status: success 21 | 2025-04-06 15:00:21,532 [INFO] Executing workflow step: build_csim 22 | 2025-04-06 15:00:21,532 [INFO] Running agent hls_builder for step build_csim 23 | 2025-04-06 15:00:21,532 [INFO] Agent hls_builder status: running 24 | 2025-04-06 15:00:21,533 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csim 25 | 2025-04-06 15:00:28,586 [ERROR] Agent hls_builder: Make failed with return code 2 26 | 2025-04-06 15:00:28,586 [INFO] Agent hls_builder status: failed - Make failed with return code 2 27 | 2025-04-06 15:00:28,586 [INFO] Starting debug cycle 28 | 2025-04-06 15:00:28,586 [INFO] Executing workflow step: debug_errors 29 | 2025-04-06 15:00:28,586 [INFO] Running agent debug_assistant for step debug_errors 30 | 2025-04-06 15:00:28,586 [INFO] Agent debug_assistant status: running 31 | 2025-04-06 15:00:28,586 [INFO] Running command: /home/jielei/Projects/UTS/llm-fpga-design/.venv/bin/python3 /home/jielei/Projects/UTS/llm-fpga-design/scripts/debug_assistant.py --error_log /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log --source_file /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.hpp /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.cpp /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker_tb.cpp --model gemini-2.5-pro-exp-03-25 32 | 2025-04-06 15:02:12,412 [INFO] Agent debug_assistant status: success 33 | 2025-04-06 15:02:12,412 [INFO] Executing workflow step: build_csim 34 | 2025-04-06 15:02:12,412 [INFO] Running agent hls_builder for step build_csim 35 | 2025-04-06 15:02:12,412 [INFO] Agent hls_builder status: running 36 | 2025-04-06 15:02:12,413 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csim 37 | 2025-04-06 15:02:19,744 [INFO] Agent hls_builder status: success 38 | 2025-04-06 15:02:19,744 [INFO] Debug cycle completed successfully, continuing workflow 39 | 2025-04-06 15:02:19,744 [INFO] Executing workflow step: build_csynth 40 | 2025-04-06 15:02:19,744 [INFO] Running agent hls_builder for step build_csynth 41 | 2025-04-06 15:02:19,744 [INFO] Agent hls_builder status: running 42 | 2025-04-06 15:02:19,744 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csynth 43 | 2025-04-06 15:02:34,838 [INFO] Agent hls_builder status: success 44 | 2025-04-06 15:02:34,839 [INFO] Executing workflow step: build_cosim 45 | 2025-04-06 15:02:34,839 [INFO] Running agent hls_builder for step build_cosim 46 | 2025-04-06 15:02:34,839 [INFO] Agent hls_builder status: running 47 | 2025-04-06 15:02:34,839 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make cosim 48 | 2025-04-06 15:03:00,392 [INFO] Agent hls_builder status: success 49 | 2025-04-06 15:03:00,392 [INFO] Executing workflow step: export_ip 50 | 2025-04-06 15:03:00,392 [INFO] Running agent hls_builder for step export_ip 51 | 2025-04-06 15:03:00,392 [INFO] Agent hls_builder status: running 52 | 2025-04-06 15:03:00,392 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make export_ip 53 | 2025-04-06 15:03:18,971 [INFO] Agent hls_builder status: success 54 | 2025-04-06 15:03:18,971 [INFO] Executing workflow step: build_impl 55 | 2025-04-06 15:03:18,971 [INFO] Running agent hls_builder for step build_impl 56 | 2025-04-06 15:03:18,972 [INFO] Agent hls_builder status: running 57 | 2025-04-06 15:03:18,972 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make impl 58 | 2025-04-06 15:06:56,841 [ERROR] Agent hls_builder: INFO: [Place 46-56] BUFG insertion identified 0 candidate nets. Inserted BUFG: 0, Replicated BUFG Driver: 0, Skipped due to Placement/Routing Conflicts: 0, Skipped due to Timing Degradation: 0, Skipped due to netlist editing failed: 0. 59 | Number of Failed Nets = 616 60 | (Failed Nets is the sum of unrouted and partially routed nets) 61 | Number of Failed Nets = 0 62 | (Failed Nets is the sum of unrouted and partially routed nets) 63 | 2025-04-06 15:06:56,842 [INFO] Agent hls_builder status: failed - INFO: [Place 46-56] BUFG insertion identified 0 candidate nets. Inserted BUFG: 0, Replicated BUFG Driver: 0, Skipped due to Placement/Routing Conflicts: 0, Skipped due to Timing Degradation: 0, Skipped due to netlist editing failed: 0. 64 | Number of Failed Nets = 616 65 | (Failed Nets is the sum of unrouted and partially routed nets) 66 | Number of Failed Nets = 0 67 | (Failed Nets is the sum of unrouted and partially routed nets) 68 | 2025-04-06 15:06:56,842 [INFO] Encountered error, running error handling: generate_documentation 69 | 2025-04-06 15:06:56,842 [INFO] Executing workflow step: generate_documentation 70 | 2025-04-06 15:06:56,842 [INFO] Running agent documentation_generator for step generate_documentation 71 | 2025-04-06 15:06:56,842 [INFO] Agent documentation_generator status: running 72 | 2025-04-06 15:06:56,842 [INFO] Analyzing performance reports... 73 | 2025-04-06 15:06:57,128 [INFO] Analyzing LLM responses for insights... 74 | 2025-04-06 15:06:57,128 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 75 | 2025-04-06 15:06:57,131 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 76 | 2025-04-06 15:06:57,137 [INFO] Collecting workflow execution data... 77 | 2025-04-06 15:06:57,137 [INFO] Generating documentation... 78 | 2025-04-06 15:06:57,138 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 79 | 2025-04-06 15:10:16,909 [INFO] Agent documentation_generator status: success 80 | 2025-04-06 15:10:16,910 [INFO] Workflow completed with status: success 81 | 2025-04-06 15:10:16,910 [INFO] Agent workflow completed successfully 82 | 2025-04-06 15:38:52,946 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 83 | 2025-04-06 15:38:52,946 [INFO] Registered agent: code_generator 84 | 2025-04-06 15:38:52,946 [INFO] Registered agent: debug_assistant 85 | 2025-04-06 15:38:52,947 [INFO] Registered agent: makefile_generator 86 | 2025-04-06 15:38:52,947 [INFO] Registered agent: performance_optimizer 87 | 2025-04-06 15:38:52,947 [INFO] Registered agent: hls_builder 88 | 2025-04-06 15:38:52,947 [INFO] Registered agent: documentation_generator 89 | 2025-04-06 15:38:52,947 [INFO] Agent documentation_generator status: running 90 | 2025-04-06 15:38:52,947 [INFO] Analyzing performance reports... 91 | 2025-04-06 15:38:53,187 [INFO] Analyzing LLM responses for insights... 92 | 2025-04-06 15:38:53,187 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 93 | 2025-04-06 15:38:53,190 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 94 | 2025-04-06 15:38:53,196 [INFO] Collecting workflow execution data... 95 | 2025-04-06 15:38:53,196 [INFO] Generating documentation... 96 | 2025-04-06 15:38:53,197 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 97 | 2025-04-06 15:41:50,339 [INFO] Agent documentation_generator status: success 98 | 2025-04-06 19:26:35,235 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 99 | 2025-04-06 19:26:35,235 [INFO] Registered agent: code_generator 100 | 2025-04-06 19:26:35,236 [INFO] Registered agent: debug_assistant 101 | 2025-04-06 19:26:35,236 [INFO] Registered agent: makefile_generator 102 | 2025-04-06 19:26:35,236 [INFO] Registered agent: performance_optimizer 103 | 2025-04-06 19:26:35,236 [INFO] Registered agent: hls_builder 104 | 2025-04-06 19:26:35,236 [INFO] Registered agent: documentation_generator 105 | 2025-04-06 19:26:35,236 [INFO] Agent documentation_generator status: running 106 | 2025-04-06 19:26:35,236 [INFO] Analyzing performance reports... 107 | 2025-04-06 19:26:35,498 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 108 | 2025-04-06 19:26:35,498 [INFO] Analyzing LLM responses for insights... 109 | 2025-04-06 19:26:35,498 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 110 | 2025-04-06 19:26:35,501 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 111 | 2025-04-06 19:26:35,507 [INFO] Collecting workflow execution data... 112 | 2025-04-06 19:26:35,508 [INFO] Generating documentation... 113 | 2025-04-06 19:26:35,508 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 114 | 2025-04-06 19:29:54,444 [INFO] Agent documentation_generator status: success 115 | 2025-04-06 20:13:46,400 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 116 | 2025-04-06 20:13:46,400 [INFO] Registered agent: code_generator 117 | 2025-04-06 20:13:46,400 [INFO] Registered agent: debug_assistant 118 | 2025-04-06 20:13:46,400 [INFO] Registered agent: makefile_generator 119 | 2025-04-06 20:13:46,400 [INFO] Registered agent: performance_optimizer 120 | 2025-04-06 20:13:46,400 [INFO] Registered agent: hls_builder 121 | 2025-04-06 20:13:46,400 [INFO] Registered agent: documentation_generator 122 | 2025-04-06 20:13:46,400 [INFO] Agent documentation_generator status: running 123 | 2025-04-06 20:13:46,400 [INFO] Analyzing performance reports... 124 | 2025-04-06 20:13:46,635 [ERROR] Error generating performance report: Unknown format code 'f' for object of type 'str' 125 | 2025-04-06 20:13:46,635 [INFO] Analyzing LLM responses for insights... 126 | 2025-04-06 20:13:46,635 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 127 | 2025-04-06 20:13:46,638 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 128 | 2025-04-06 20:13:46,644 [INFO] Collecting workflow execution data... 129 | 2025-04-06 20:13:46,644 [INFO] Generating documentation... 130 | 2025-04-06 20:13:46,645 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 131 | 2025-04-06 20:22:38,283 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 132 | 2025-04-06 20:22:38,283 [INFO] Registered agent: code_generator 133 | 2025-04-06 20:22:38,283 [INFO] Registered agent: debug_assistant 134 | 2025-04-06 20:22:38,283 [INFO] Registered agent: makefile_generator 135 | 2025-04-06 20:22:38,283 [INFO] Registered agent: performance_optimizer 136 | 2025-04-06 20:22:38,283 [INFO] Registered agent: hls_builder 137 | 2025-04-06 20:22:38,283 [INFO] Registered agent: documentation_generator 138 | 2025-04-06 20:22:38,284 [INFO] Agent documentation_generator status: running 139 | 2025-04-06 20:22:38,284 [INFO] Analyzing performance reports... 140 | 2025-04-06 20:22:38,547 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 141 | 2025-04-06 20:22:38,547 [INFO] Analyzing LLM responses for insights... 142 | 2025-04-06 20:22:38,547 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 143 | 2025-04-06 20:22:38,550 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 144 | 2025-04-06 20:22:38,557 [INFO] Collecting workflow execution data... 145 | 2025-04-06 20:22:38,557 [INFO] Generating documentation... 146 | 2025-04-06 20:22:38,557 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 147 | 2025-04-06 20:26:01,193 [INFO] Agent documentation_generator status: success 148 | 2025-04-06 20:55:53,924 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 149 | 2025-04-06 20:55:53,924 [INFO] Registered agent: code_generator 150 | 2025-04-06 20:55:53,924 [INFO] Registered agent: debug_assistant 151 | 2025-04-06 20:55:53,924 [INFO] Registered agent: makefile_generator 152 | 2025-04-06 20:55:53,924 [INFO] Registered agent: performance_optimizer 153 | 2025-04-06 20:55:53,924 [INFO] Registered agent: hls_builder 154 | 2025-04-06 20:55:53,924 [INFO] Registered agent: documentation_generator 155 | 2025-04-06 20:55:53,924 [INFO] Agent documentation_generator status: running 156 | 2025-04-06 20:55:53,924 [INFO] Analyzing performance reports... 157 | 2025-04-06 20:55:54,201 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 158 | 2025-04-06 20:55:54,201 [INFO] Analyzing LLM responses for insights... 159 | 2025-04-06 20:55:54,201 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 160 | 2025-04-06 20:55:54,204 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 161 | 2025-04-06 20:55:54,210 [INFO] Collecting workflow execution data... 162 | 2025-04-06 20:55:54,210 [INFO] Generating documentation... 163 | 2025-04-06 20:55:54,211 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 164 | 2025-04-06 20:59:21,513 [INFO] Agent documentation_generator status: success 165 | 2025-04-06 21:21:18,615 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 166 | 2025-04-06 21:21:18,615 [INFO] Registered agent: code_generator 167 | 2025-04-06 21:21:18,615 [INFO] Registered agent: debug_assistant 168 | 2025-04-06 21:21:18,615 [INFO] Registered agent: makefile_generator 169 | 2025-04-06 21:21:18,616 [INFO] Registered agent: performance_optimizer 170 | 2025-04-06 21:21:18,616 [INFO] Registered agent: hls_builder 171 | 2025-04-06 21:21:18,616 [INFO] Registered agent: documentation_generator 172 | 2025-04-06 21:21:18,617 [INFO] Agent documentation_generator status: running 173 | 2025-04-06 21:21:18,617 [INFO] Analyzing performance reports... 174 | 2025-04-06 21:21:18,895 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 175 | 2025-04-06 21:21:18,895 [INFO] Analyzing LLM responses for insights... 176 | 2025-04-06 21:21:18,895 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 177 | 2025-04-06 21:21:18,898 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 178 | 2025-04-06 21:21:18,904 [INFO] Collecting workflow execution data... 179 | 2025-04-06 21:21:18,904 [INFO] Generating documentation... 180 | 2025-04-06 21:21:18,905 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 181 | 2025-04-06 21:24:27,350 [INFO] Agent documentation_generator status: success 182 | 2025-04-06 21:56:03,569 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 183 | 2025-04-06 21:56:03,570 [INFO] Registered agent: code_generator 184 | 2025-04-06 21:56:03,570 [INFO] Registered agent: debug_assistant 185 | 2025-04-06 21:56:03,570 [INFO] Registered agent: makefile_generator 186 | 2025-04-06 21:56:03,570 [INFO] Registered agent: performance_optimizer 187 | 2025-04-06 21:56:03,570 [INFO] Registered agent: hls_builder 188 | 2025-04-06 21:56:03,570 [INFO] Registered agent: documentation_generator 189 | 2025-04-06 21:56:03,570 [INFO] Agent documentation_generator status: running 190 | 2025-04-06 21:56:03,570 [INFO] Analyzing performance reports... 191 | 2025-04-06 21:56:03,843 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 192 | 2025-04-06 21:56:03,843 [INFO] Analyzing LLM responses for insights... 193 | 2025-04-06 21:56:03,843 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 194 | 2025-04-06 21:56:03,846 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 195 | 2025-04-06 21:56:03,852 [INFO] Collecting workflow execution data... 196 | 2025-04-06 21:56:03,852 [INFO] Generating documentation... 197 | 2025-04-06 21:56:03,853 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 198 | 2025-04-06 21:59:23,375 [INFO] Agent documentation_generator status: success 199 | 2025-04-06 22:18:02,921 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts 200 | 2025-04-06 22:18:02,921 [INFO] Registered agent: code_generator 201 | 2025-04-06 22:18:02,922 [INFO] Registered agent: debug_assistant 202 | 2025-04-06 22:18:02,922 [INFO] Registered agent: makefile_generator 203 | 2025-04-06 22:18:02,922 [INFO] Registered agent: performance_optimizer 204 | 2025-04-06 22:18:02,922 [INFO] Registered agent: hls_builder 205 | 2025-04-06 22:18:02,922 [INFO] Registered agent: documentation_generator 206 | 2025-04-06 22:18:02,922 [INFO] Agent documentation_generator status: running 207 | 2025-04-06 22:18:02,922 [INFO] Analyzing performance reports... 208 | 2025-04-06 22:18:03,197 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md 209 | 2025-04-06 22:18:03,197 [INFO] Analyzing LLM responses for insights... 210 | 2025-04-06 22:18:03,198 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md 211 | 2025-04-06 22:18:03,200 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports 212 | 2025-04-06 22:18:03,207 [INFO] Collecting workflow execution data... 213 | 2025-04-06 22:18:03,207 [INFO] Generating documentation... 214 | 2025-04-06 22:18:03,207 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation... 215 | 2025-04-06 22:21:23,550 [INFO] Agent documentation_generator status: success 216 | --------------------------------------------------------------------------------