├── data
    └── locations_3_ref.txt
├── example
    ├── peakLocs_out.txt
    ├── locations_3_ref.txt
    ├── csynth.tcl
    ├── peakPicker_tb.m
    ├── peakPicker.hpp
    ├── peakPicker.m
    ├── peakPicker.cpp.backup
    ├── peakPicker.cpp
    ├── peakPicker_optimized.cpp
    ├── peakPicker_tb.cpp
    ├── Makefile
    └── vitis_hls.log
├── implementations
    └── peakPicker
    │   ├── csim.tcl
    │   ├── cosim.tcl
    │   ├── csynth.tcl
    │   ├── impl.tcl
    │   ├── export.tcl
    │   ├── performance_metrics.md
    │   ├── debug_reports
    │       └── peakPicker_debug_data_20250406_150212.json
    │   ├── peakPicker.hpp
    │   ├── peakPicker.cpp
    │   ├── Makefile
    │   ├── peakPicker_tb.cpp
    │   └── llm_response.md
├── requirements.txt
├── prompt_feedback.json
├── .gitignore
├── prompts
    ├── hls_conversion.md
    ├── hls_debugging.md
    ├── hls_generation.md
    ├── performance_optimization.md
    ├── documentation_template.md
    ├── readme_generation.md
    └── paper_generation.md
├── algorithms
    ├── peakPicker_tb.m
    └── peakPicker.m
├── workflows
    └── default_workflow.json
├── scripts
    ├── validate_report_parsing.py
    ├── Makefile
    └── optimize_hls_code.py
├── CLAUDE.md
├── readme.md
└── agent_log.txt


/data/locations_3_ref.txt:
--------------------------------------------------------------------------------
1 | 4806
2 | 


--------------------------------------------------------------------------------
/example/peakLocs_out.txt:
--------------------------------------------------------------------------------
1 | 4806
2 | 


--------------------------------------------------------------------------------
/example/locations_3_ref.txt:
--------------------------------------------------------------------------------
1 | 4806
2 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/csim.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ../../data/locations_3_ref.txt
 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt
 7 | add_files -tb ../../data/threshold_in.txt
 8 | open_solution solution1
 9 | set_part {xc7k410t-ffg900-2}
10 | csim_design
11 | exit
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Required packages for LLM-Aided FPGA Design Flow
 2 | 
 3 | # HTTP requests
 4 | requests>=2.31.0
 5 | 
 6 | # Environment variables
 7 | python-dotenv>=1.0.0
 8 | 
 9 | # LLM API clients
10 | openai>=1.3.0
11 | google-generativeai>=0.3.0
12 | anthropic>=0.5.0
13 | 
14 | # Data handling
15 | numpy>=1.24.0
16 | pandas>=2.0.0
17 | 
18 | # Visualization (optional)
19 | matplotlib>=3.7.0
20 | 
21 | # Utility
22 | tqdm>=4.65.0
23 | 


--------------------------------------------------------------------------------
/example/csynth.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ./locations_3_ref.txt
 6 | add_files -tb ./peakLocs_out.txt
 7 | add_files -tb ./pssCorrMagSq_3_in.txt
 8 | add_files -tb ./threshold_in.txt
 9 | open_solution solution1
10 | set_part {xc7k410t-ffg900-2}
11 | create_clock -period 3.90 -name default
12 | set_clock_uncertainty 12.5%
13 | csynth_design
14 | exit
15 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/cosim.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ../../data/locations_3_ref.txt
 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt
 7 | add_files -tb ../../data/threshold_in.txt
 8 | open_solution solution1
 9 | set_part {xc7k410t-ffg900-2}
10 | create_clock -period 3.90 -name default
11 | set_clock_uncertainty 12.5%
12 | cosim_design
13 | exit
14 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/csynth.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ../../data/locations_3_ref.txt
 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt
 7 | add_files -tb ../../data/threshold_in.txt
 8 | open_solution solution1
 9 | set_part {xc7k410t-ffg900-2}
10 | create_clock -period 3.90 -name default
11 | set_clock_uncertainty 12.5%
12 | csynth_design
13 | exit
14 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/impl.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ../../data/locations_3_ref.txt
 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt
 7 | add_files -tb ../../data/threshold_in.txt
 8 | open_solution solution1
 9 | set_part {xc7k410t-ffg900-2}
10 | create_clock -period 3.90 -name default
11 | set_clock_uncertainty 12.5%
12 | export_design -flow impl
13 | exit
14 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/export.tcl:
--------------------------------------------------------------------------------
 1 | open_project proj_peakPicker
 2 | set_top peakPicker
 3 | add_files peakPicker.cpp
 4 | add_files -tb peakPicker_tb.cpp
 5 | add_files -tb ../../data/locations_3_ref.txt
 6 | add_files -tb ../../data/pssCorrMagSq_3_in.txt
 7 | add_files -tb ../../data/threshold_in.txt
 8 | open_solution solution1
 9 | set_part {xc7k410t-ffg900-2}
10 | create_clock -period 3.90 -name default
11 | set_clock_uncertainty 12.5%
12 | export_design -format ip_catalog
13 | exit
14 | 


--------------------------------------------------------------------------------
/prompt_feedback.json:
--------------------------------------------------------------------------------
1 | {"prompt": "hls_generation", "timestamp": "2025-04-06T11:38:21.561719", "model": "gemini-2.5-pro-exp-03-25", "component": "peakPicker", "status": "success", "files_generated": 3, "file_types": ["peakPicker_tb.cpp", "peakPicker.hpp", "peakPicker.cpp"]}
2 | {"prompt": "hls_generation", "timestamp": "2025-04-06T15:00:21.333684", "model": "gemini-2.5-pro-exp-03-25", "component": "peakPicker", "status": "success", "files_generated": 3, "file_types": ["peakPicker.hpp", "peakPicker.cpp", "peakPicker_tb.cpp"]}
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv/
 2 | .vscode/
 3 | implementations/peakPicker/proj_peakPicker/
 4 | # implementations/peakPicker/*.md
 5 | # implementations/peakPicker/*.py
 6 | implementations/peakPicker/*.log
 7 | implementations/peakPicker/*.bak
 8 | # prompts/paper_generation.md
 9 | # prompts/readme_generation.md
10 | # prompts/hls_generation.md
11 | # prompts/backups/
12 | scripts/__pycache__/
13 | # scripts/agent_cli.py
14 | # scripts/agent_framework.py
15 | # scripts/generate_documentation.py
16 | record/
17 | example/proj_peakPicker
18 | # workflows/
19 | /*_log.txt
20 | 


--------------------------------------------------------------------------------
/prompts/hls_conversion.md:
--------------------------------------------------------------------------------
 1 | # Copilot Instructions for Peak Picker Implementation
 2 | 
 3 | ## Project Context
 4 | This project implements a critical component of a 5G NR SSB detection application. 
 5 | The peak picker algorithm identifies SSB signals by locating peaks where the 
 6 | magnitude squared of the PSS correlation (`xcorr`) exceeds a predefined threshold.
 7 | 
 8 | ## Task Description
 9 | Your task is to translate the MATLAB peak picker algorithm into efficient HLS C++ 
10 | code while preserving exact functionality. The implementation should be optimized 
11 | for FPGA deployment using Xilinx HLS directives.
12 | 
13 | [Additional sections...]


--------------------------------------------------------------------------------
/implementations/peakPicker/performance_metrics.md:
--------------------------------------------------------------------------------
 1 | # Performance Metrics Report
 2 | 
 3 | ## Resource Utilization
 4 | 
 5 | | Implementation | LUT | FF | DSP | BRAM | URAM | SRL |
 6 | |---------------|-----|----|----|------|---------|-----|
 7 | | solution1 | 324 | 528 | 0 | 0 | 0 | 17 |
 8 | 
 9 | ## Timing
10 | 
11 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) |
12 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------|
13 | | solution1 | 3.90 | 256.41 | 3.46 | 288.77 | 3.01 | 331.79 |
14 | 
15 | ## Latency
16 | 
17 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) |
18 | |---------------|-------------|-------------|-----------------|-----------------------------|
19 | | solution1 | 6003 | 6003 | - | - |
20 | 
21 | 


--------------------------------------------------------------------------------
/algorithms/peakPicker_tb.m:
--------------------------------------------------------------------------------
 1 | % This is the testbench for the peakPicker function.
 2 | % It reads the input signal from a file, calls the peakPicker function,
 3 | % and writes the output to a file.
 4 | %
 5 | % Author: Jie Lei
 6 | % Date: 03/30/2025
 7 | %
 8 | % Read the input cross correlation from a file.
 9 | xcorr = readmatrix('pssCorrMagSq_3_in.txt','Delimiter', 'tab');
10 | % Read the threshold from a file.
11 | threshold = readmatrix('threshold_in.txt','Delimiter', 'tab');
12 | 
13 | % Call the peakPicker function.
14 | [peakLocs] = peakPicker(xcorr, threshold);
15 | % Write the output to a file.
16 | writematrix(peakLocs, 'peakLocs_out.txt','Delimiter', 'tab');
17 | % Read the reference output from a file.
18 | refLocs = readmatrix('locations_3_ref.txt','Delimiter', 'tab');
19 | % Compare the output with the reference output.
20 | if isequal(peakLocs, refLocs)
21 |     disp('Test passed: The output matches the reference output.');
22 | else
23 |     disp('Test failed: The output does not match the reference output.');
24 | end


--------------------------------------------------------------------------------
/example/peakPicker_tb.m:
--------------------------------------------------------------------------------
 1 | % This is the testbench for the peakPicker function.
 2 | % It reads the input signal from a file, calls the peakPicker function,
 3 | % and writes the output to a file.
 4 | %
 5 | % Author: Jie Lei
 6 | % Date: 03/30/2025
 7 | %
 8 | % Read the input cross correlation from a file.
 9 | xcorr = readmatrix('pssCorrMagSq_3_in.txt','Delimiter', 'tab');
10 | % Read the threshold from a file.
11 | threshold = readmatrix('threshold_in.txt','Delimiter', 'tab');
12 | 
13 | % Call the peakPicker function.
14 | [peakLocs] = peakPicker(xcorr, threshold);
15 | % Write the output to a file.
16 | writematrix(peakLocs, 'peakLocs_out.txt','Delimiter', 'tab');
17 | % Read the reference output from a file.
18 | refLocs = readmatrix('locations_3_ref.txt','Delimiter', 'tab');
19 | % Compare the output with the reference output.
20 | if isequal(peakLocs, refLocs)
21 |     disp('Test passed: The output matches the reference output.');
22 | else
23 |     disp('Test failed: The output does not match the reference output.');
24 | end


--------------------------------------------------------------------------------
/implementations/peakPicker/debug_reports/peakPicker_debug_data_20250406_150212.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "timestamp": "2025-04-06T15:02:12.379457",
 3 |   "files": [
 4 |     "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.hpp",
 5 |     "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.cpp",
 6 |     "/home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker_tb.cpp"
 7 |   ],
 8 |   "error_analysis": {
 9 |     "primary_category": "unknown",
10 |     "all_categories": [],
11 |     "details": {}
12 |   },
13 |   "bug_summary": "the error log.",
14 |   "fix_summary": "the fix.\n    *   **If `csim_design` now passes (returns 0):** The original issue might have been misleadingly reported, or perhaps the indexing fix indirectly resolved the crash condition (unlikely but possible). Check if the test now reports PASSED or FAILED based on the corrected comparison logic.\n    *   **If `csim_design` still fails with `[SIM 100]`:** The crash is due to a different reason. Examine the `csim.log` file (usually found in the `<solution>/csim/build/` directory) for more detailed error messages (e.g., segmentation fault location, specific assertion failures). Further debugging would involve simplifying the input data, adding debug prints (`std::cout`) in the DUT (use sparingly and remove for synthesis), or checking the HLS tool environment.\n    *   Ensure the input files (`pssCorrMagSq_3_in.txt`, `threshold_in.txt`, `locations_3_ref.txt`) exist in the expected location relative to where the simulation is run (usually `<project>/<solution>/csim/build/`).",
15 |   "model_used": "gemini-2.5-pro-exp-03-25"
16 | }


--------------------------------------------------------------------------------
/example/peakPicker.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PEAKPICKER_HPP
 2 | #define PEAKPICKER_HPP
 3 | 
 4 | #ifdef __SYNTHESIS__
 5 | #include <hls_stream.h>
 6 | #include <ap_fixed.h>
 7 | #include <ap_int.h>
 8 | 
 9 | // Optimized data type definitions for lowest latency
10 | typedef ap_fixed<24, 6> data_t;      // Reduced precision: 24-bit, 6 integer bits
11 | typedef ap_uint<16> index_t;         // 16-bit unsigned integer for indices
12 | typedef ap_uint<8> count_t;          // 8-bit counter for small counts
13 | #else
14 | // Testbench mode - use standard C++ types
15 | typedef double data_t;               // Use double for testbench compatibility
16 | typedef unsigned short index_t;      // 16-bit unsigned integer
17 | typedef unsigned char count_t;       // 8-bit counter
18 | #endif
19 | 
20 | // Algorithm parameters
21 | constexpr int WINDOW_LENGTH = 11;
22 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2;  // 5
23 | constexpr int MAX_INPUT_SIZE = 6001;
24 | constexpr int MAX_PEAKS = 100;  // Conservative estimate for maximum peaks
25 | 
26 | #ifdef __SYNTHESIS__
27 | // Optimized streaming interface function declaration
28 | void peakPicker(
29 |     hls::stream<data_t>& xcorr_stream,
30 |     hls::stream<data_t>& threshold_stream,
31 |     index_t input_length,
32 |     hls::stream<index_t>& locations_stream,
33 |     index_t* num_peaks
34 | );
35 | #endif
36 | 
37 | // Array-based interface for testbench compatibility and synthesis
38 | void peakPicker_wrapper(
39 |     data_t xcorr[MAX_INPUT_SIZE],
40 |     data_t threshold[MAX_INPUT_SIZE], 
41 |     index_t input_length,
42 |     index_t locations[MAX_PEAKS],
43 |     index_t* num_peaks
44 | );
45 | 
46 | // Main function declaration
47 | void peakPicker(
48 |     data_t xcorr[MAX_INPUT_SIZE],
49 |     data_t threshold[MAX_INPUT_SIZE], 
50 |     index_t input_length,
51 |     index_t locations[MAX_PEAKS],
52 |     index_t* num_peaks
53 | );
54 | 
55 | #endif // PEAKPICKER_HPP


--------------------------------------------------------------------------------
/implementations/peakPicker/peakPicker.hpp:
--------------------------------------------------------------------------------
 1 | /* AUTO-EDITED BY DEBUG ASSISTANT */
 2 | #ifndef PEAK_PICKER_HPP
 3 | #define PEAK_PICKER_HPP
 4 | 
 5 | #include <ap_fixed.h>
 6 | #include <hls_stream.h>
 7 | #include <ap_int.h> // For integer types if needed for indices
 8 | 
 9 | //--------------------------------------------------------------------------
10 | // Constants and Parameters
11 | //--------------------------------------------------------------------------
12 | 
13 | // Fixed-point type configuration (Adjust W and I based on signal analysis)
14 | constexpr int DATA_W = 32; // Total width
15 | constexpr int DATA_I = 16; // Integer width (including sign bit)
16 | 
17 | // Sliding window configuration (Matches MATLAB reference)
18 | constexpr int WINDOW_LENGTH = 11;
19 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2; // Index 5 (0-based)
20 | 
21 | // Index type configuration (Ensure it can hold the maximum sample index)
22 | constexpr int INDEX_W = 32; // Width for location indices
23 | 
24 | //--------------------------------------------------------------------------
25 | // Type Definitions
26 | //--------------------------------------------------------------------------
27 | 
28 | // Fixed-point type for input data (xcorr and threshold)
29 | typedef ap_fixed<DATA_W, DATA_I> Data_t;
30 | 
31 | // Type for output location indices
32 | // Using ap_uint for non-negative indices. Use ap_int if indices can be negative.
33 | // Or simply use 'int' if standard integer sizes are sufficient and synthesizable.
34 | typedef ap_uint<INDEX_W> Index_t;
35 | // typedef int Index_t; // Alternative if standard int is sufficient
36 | 
37 | //--------------------------------------------------------------------------
38 | // Function Declaration
39 | //--------------------------------------------------------------------------
40 | 
41 | /**
42 |  * @brief Finds peaks in a cross-correlation signal using a sliding window.
43 |  *
44 |  * @param xcorrStream     Input stream of cross-correlation magnitude squared values.
45 |  * @param thresholdStream Input stream of threshold values (corresponding to xcorr samples).
46 |  * @param locationStream  Output stream for detected peak location indices (0-based).
47 |  * @param numSamples      Total number of samples to process from the input streams.
48 |  */
49 | void peakPicker(
50 |     hls::stream<Data_t>& xcorrStream,
51 |     hls::stream<Data_t>& thresholdStream,
52 |     hls::stream<Index_t>& locationStream,
53 |     int numSamples
54 | );
55 | 
56 | #endif // PEAK_PICKER_HPP


--------------------------------------------------------------------------------
/prompts/hls_debugging.md:
--------------------------------------------------------------------------------
 1 | # HLS Code Debugging Assistant
 2 | 
 3 | ## Task Description
 4 | You are tasked with analyzing HLS C++ code that has encountered errors during compilation, simulation, or synthesis. You must identify the root causes of the errors and provide specific solutions.
 5 | 
 6 | ## Source Files
 7 | The following HLS C++ source files have been provided:
 8 | 
 9 | {{SOURCE_FILES}}
10 | 
11 | ## Error Log
12 | The following errors were encountered during the HLS process:
13 | 
14 | {{ERROR_LOG}}
15 | 
16 | ## Debugging Process
17 | 
18 | Please follow this structured approach to debug the code:
19 | 
20 | 1. **Error Analysis**
21 |    - Categorize errors (compilation, simulation, synthesis, etc.)
22 |    - Identify error patterns and relationships between multiple errors
23 |    - Determine if errors are syntax-related, interface-related, or algorithm-related
24 | 
25 | 2. **Root Cause Identification**
26 |    - Locate the specific code causing each error
27 |    - Analyze context surrounding the problematic code
28 |    - Identify patterns of misuse of HLS constructs or C++ language features
29 |    - Check for common HLS pitfalls:
30 |      - Unsupported C++ features in HLS
31 |      - Memory access pattern issues
32 |      - Data type incompatibilities
33 |      - Interface specification problems
34 |      - Pragma-related issues
35 | 
36 | 3. **Solution Development**
37 |    - Propose specific fixes for each identified issue
38 |    - Provide explanations for why the fixes will resolve the errors
39 |    - Include code snippets showing the corrections
40 |    - Address any potential side effects of the proposed changes
41 | 
42 | 4. **Verification Guidance**
43 |    - Suggest verification steps to ensure the fixes are correct
44 |    - Recommend additional tests if appropriate
45 |    - Provide guidance on preventing similar issues in the future
46 | 
47 | ## IMPORTANT: Response Format
48 | 1. First, provide your analysis of the issue
49 | 2. Then, clearly indicate the start of the corrected code with "### COMPLETE CORRECTED SOURCE CODE:"
50 | 3. Provide the ENTIRE corrected source code file in a single code block, not just the changes
51 | 4. If you have multiple files, provide each file in a separate code block
52 | 5. Use the following format for code blocks:
53 |   - For function code file
54 |     **File: `{component}.cpp`**
55 | 
56 |     ```cpp
57 |     // Your complete corrected code here
58 |     ```
59 |   - For header file
60 |     **File: `{component}.hpp`**
61 | 
62 |     ```cpp
63 |     // Your complete corrected code here
64 |     ```
65 |   - For test bench file
66 |     **File: `{component}_tb.cpp`**
67 |     
68 |     ```cpp
69 |     // Your complete corrected code here
70 |     ```
71 | ## BEST PRACTICES
72 | 
73 | - Successfully resolved debugging issues on 2025-04-06
74 | 
75 | - Successfully resolved debugging issues on 2025-04-06
76 | 
77 | - Successfully resolved debugging issues on 2025-04-06
78 | 
79 | - Successfully resolved debugging issues on 2025-04-06
80 | 


--------------------------------------------------------------------------------
/prompts/hls_generation.md:
--------------------------------------------------------------------------------
 1 | # Copilot Instructions for {component} Implementation
 2 | 
 3 | ## Project Context
 4 | 
 5 | This project implements a component for an FPGA-based signal processing application. The {component} algorithm needs to be translated from a reference implementation to efficient HLS C++ code for FPGA deployment.
 6 | 
 7 | ## Task Description
 8 | 
 9 | Your task is to translate the reference {component} algorithm into efficient HLS C++ code while preserving exact functionality. The implementation should be optimized for FPGA deployment using Xilinx HLS directives.
10 | 
11 | **Required Files:**
12 | 
13 | - `{component}.hpp`: Header file with type definitions, function declarations, and parameters
14 | - `{component}.cpp`: Implementation file with the core algorithm
15 | - `{component}_tb.cpp`: C++ testbench that validates the implementation against reference data
16 | 
17 | ## Input/Output Specifications
18 | 
19 | - **Inputs:**
20 |   - [To be specified based on {component} requirements]
21 | - **Outputs:**
22 |   - [To be specified based on {component} requirements]
23 | 
24 | ## Implementation Requirements
25 | 
26 | ### Functional Requirements
27 | 
28 | - Implement the `{component}()` function in HLS C++ with exactly the same behavior as the reference
29 | - Follow bit-accurate implementation of the reference algorithm (results must match reference within specified error margins)
30 | - Document code thoroughly with comments explaining the algorithm and optimization decisions
31 | 
32 | ### Interface and Data Type Requirements
33 | 
34 | - Use `hls::stream` interfaces with appropriate buffer depths for streaming data
35 | - Implement fixed-point arithmetic with `ap_fixed<W,I>` (specify exact bit widths based on precision requirements)
36 | - Use `hls::complex<ap_fixed<W,I>>` for any complex number operations
37 | - Define all constant parameters in `{component}.hpp` using `#define` or `constexpr`
38 | - Create descriptive type aliases with `typedef` or `using` statements
39 | 
40 | ### File I/O and Validation only in testbench file `{component}_tb.cpp`
41 | 
42 | - Read input data from `{component}_in.txt` (one value per line)
43 | - Read threshold values from `threshold_in.txt` (one value per line)
44 | - Read reference output data from `{component}_ref.txt` (one value per line)
45 | - Implement robust error checking for file operations with clear error messages
46 | - Calculate and report both absolute and relative errors between your implementation and reference values
47 | 
48 | ### Performance Optimization
49 | 
50 | - Apply `#pragma HLS PIPELINE II=1` to performance-critical loops
51 | - Use `#pragma HLS DATAFLOW` for task-level pipelining
52 | - Implement arrays exceeding 64 elements using dual-port block RAM
53 | - Apply memory partitioning to arrays requiring parallel access
54 | - Configure optimization directives based on throughput requirements
55 | - Balance resource usage and performance based on target FPGA constraints
56 | 
57 | ### Coding Style
58 | 
59 | - Define all constants, types, and function declarations in `{component}.hpp`
60 | - Implement core algorithm in `{component}.cpp` with consistent style
61 | - Follow naming convention: camelCase for variables, PascalCase for types
62 | - Use self-documenting identifier names that clearly reflect their purpose
63 | 
64 | ## Deliverables
65 | 
66 | - Fully commented HLS C++ implementation files
67 | - Comprehensive testbench demonstrating functional correctness
68 | - Description of optimization approaches and their impact on performance
69 | 


--------------------------------------------------------------------------------
/algorithms/peakPicker.m:
--------------------------------------------------------------------------------
 1 | % Modified from the original peakPicker.m file
 2 | % to use a hardware-friendly implementation of peak finder
 3 | % This function is used to find the locations of peaks in a cross-correlation
 4 | % signal. It takes the cross-correlation signal, a threshold, and a window
 5 | % length as inputs, and returns the locations of the detected peaks.
 6 | % The function uses a sliding window approach to check for local maxima
 7 | % within the specified window length. The middle sample of the window is
 8 | % compared to the other samples in the window, and if it is greater than
 9 | % the threshold, it is considered a peak. The function also ensures that
10 | % the detected peaks are within the specified window length.
11 | % The function is designed to be hardware-friendly, meaning it is optimized
12 | % for implementation on hardware platforms such as FPGAs or ASICs. It uses
13 | % a simple and efficient algorithm to find the peaks, avoiding complex
14 | % operations that may not be suitable for hardware implementation.
15 | % The function is written in MATLAB and can be used in various applications
16 | % such as signal processing, communications, and data analysis.
17 | %
18 | % The function takes the following inputs:
19 | % - xcorr: The cross-correlation signal, which is a matrix of size
20 | %   (num_samples, num_sequences). Each column represents a different
21 | %   sequence.
22 | % - threshold: The threshold value for peak detection, which is a vector
23 | %   of size (num_samples, 1). The threshold is used to determine if a
24 | %   sample is considered a peak.
25 | % - window_length: The length of the sliding window used for peak
26 | %   detection. It is a scalar value that specifies the number of samples
27 | %   to consider in the window.
28 | %
29 | % The function returns the following output:
30 | % - locations: A vector containing the indices of the detected peaks in
31 | %   the cross-correlation signal. The indices are relative to the input
32 | %   signal and indicate the locations of the detected peaks.
33 | %
34 | % Author: Jie Lei
35 | % Date: 03/31/2025
36 | % University of Technology Sydney
37 | 
38 | function [locations]= peakPicker(xcorr,threshold)
39 | % Copyright 2021-2023 The MathWorks, Inc.
40 | 
41 |     locations=[];
42 |     window_length = 11; % Length of the sliding window
43 |     middle_location=floor(window_length/2);
44 |     xcorrBuffer = zeros(window_length, 1); % Preallocate buffer for current window
45 |     thresholdBuffer = zeros(window_length, 1); % Preallocate buffer for threshold
46 |     
47 |     for index=1:length(xcorr)-window_length+1
48 |         xcorrBuffer(2:end) = xcorrBuffer(1:end-1); % Shift buffer
49 |         xcorrBuffer(1) = xcorr(index); % Add new sample to buffer
50 |         thresholdBuffer(2:end) = thresholdBuffer(1:end-1); % Shift threshold buffer
51 |         thresholdBuffer(1) = threshold(index); % Add new threshold to buffer
52 |         if (index >= window_length)
53 |             candidate_location = index - middle_location;
54 |             % Hardware friendly implementation of peak finder
55 |             MidSample = xcorrBuffer(middle_location+1,:);
56 |             CompareOut = xcorrBuffer - MidSample; % this is a vector
57 |             % if all values in the result are negative and the middle sample is
58 |             % greater than a threshold, it is a local max
59 |             if all(CompareOut <= 0) && (MidSample > thresholdBuffer(middle_location+1))
60 |                 locations = [locations candidate_location]; %#ok
61 |             end
62 |         end
63 |     end
64 | end
65 | 


--------------------------------------------------------------------------------
/example/peakPicker.m:
--------------------------------------------------------------------------------
 1 | % Modified from the original peakPicker.m file
 2 | % to use a hardware-friendly implementation of peak finder
 3 | % This function is used to find the locations of peaks in a cross-correlation
 4 | % signal. It takes the cross-correlation signal, a threshold, and a window
 5 | % length as inputs, and returns the locations of the detected peaks.
 6 | % The function uses a sliding window approach to check for local maxima
 7 | % within the specified window length. The middle sample of the window is
 8 | % compared to the other samples in the window, and if it is greater than
 9 | % the threshold, it is considered a peak. The function also ensures that
10 | % the detected peaks are within the specified window length.
11 | % The function is designed to be hardware-friendly, meaning it is optimized
12 | % for implementation on hardware platforms such as FPGAs or ASICs. It uses
13 | % a simple and efficient algorithm to find the peaks, avoiding complex
14 | % operations that may not be suitable for hardware implementation.
15 | % The function is written in MATLAB and can be used in various applications
16 | % such as signal processing, communications, and data analysis.
17 | %
18 | % The function takes the following inputs:
19 | % - xcorr: The cross-correlation signal, which is a matrix of size
20 | %   (num_samples, num_sequences). Each column represents a different
21 | %   sequence.
22 | % - threshold: The threshold value for peak detection, which is a vector
23 | %   of size (num_samples, 1). The threshold is used to determine if a
24 | %   sample is considered a peak.
25 | % - window_length: The length of the sliding window used for peak
26 | %   detection. It is a scalar value that specifies the number of samples
27 | %   to consider in the window.
28 | %
29 | % The function returns the following output:
30 | % - locations: A vector containing the indices of the detected peaks in
31 | %   the cross-correlation signal. The indices are relative to the input
32 | %   signal and indicate the locations of the detected peaks.
33 | %
34 | % Author: Jie Lei
35 | % Date: 03/31/2025
36 | % University of Technology Sydney
37 | 
38 | function [locations]= peakPicker(xcorr,threshold)
39 | % Copyright 2021-2023 The MathWorks, Inc.
40 | 
41 |     locations=[];
42 |     window_length = 11; % Length of the sliding window
43 |     middle_location=floor(window_length/2);
44 |     xcorrBuffer = zeros(window_length, 1); % Preallocate buffer for current window
45 |     thresholdBuffer = zeros(window_length, 1); % Preallocate buffer for threshold
46 |     
47 |     for index=1:length(xcorr)-window_length+1
48 |         xcorrBuffer(2:end) = xcorrBuffer(1:end-1); % Shift buffer
49 |         xcorrBuffer(1) = xcorr(index); % Add new sample to buffer
50 |         thresholdBuffer(2:end) = thresholdBuffer(1:end-1); % Shift threshold buffer
51 |         thresholdBuffer(1) = threshold(index); % Add new threshold to buffer
52 |         if (index >= window_length)
53 |             candidate_location = index - middle_location;
54 |             % Hardware friendly implementation of peak finder
55 |             MidSample = xcorrBuffer(middle_location+1,:);
56 |             CompareOut = xcorrBuffer - MidSample; % this is a vector
57 |             % if all values in the result are negative and the middle sample is
58 |             % greater than a threshold, it is a local max
59 |             if all(CompareOut <= 0) && (MidSample > thresholdBuffer(middle_location+1))
60 |                 locations = [locations candidate_location]; %#ok
61 |             end
62 |         end
63 |     end
64 | end
65 | 


--------------------------------------------------------------------------------
/example/peakPicker.cpp.backup:
--------------------------------------------------------------------------------
 1 | #include "peakPicker.hpp"
 2 | 
 3 | #ifdef __SYNTHESIS__
 4 | #include <hls_stream.h>
 5 | #endif
 6 | 
 7 | void peakPicker(
 8 |     data_t xcorr[MAX_INPUT_SIZE],
 9 |     data_t threshold[MAX_INPUT_SIZE], 
10 |     index_t input_length,
11 |     index_t locations[MAX_PEAKS],
12 |     index_t* num_peaks
13 | ) {
14 | #ifdef __SYNTHESIS__
15 |     // Set interface pragmas for optimal hardware interface
16 |     #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem0
17 |     #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem1
18 |     #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem2
19 |     #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem3
20 |     #pragma HLS INTERFACE s_axilite port=input_length bundle=control
21 |     #pragma HLS INTERFACE s_axilite port=return bundle=control
22 | #endif
23 |     
24 |     // Local buffers for sliding window implementation
25 |     data_t xcorrBuffer[WINDOW_LENGTH];
26 |     data_t thresholdBuffer[WINDOW_LENGTH];
27 |     
28 |     // Initialize buffers to zero
29 |     init_buffers: for (int i = 0; i < WINDOW_LENGTH; i++) {
30 |         #pragma HLS UNROLL
31 |         xcorrBuffer[i] = 0;
32 |         thresholdBuffer[i] = 0;
33 |     }
34 |     
35 |     index_t peak_count = 0;
36 |     
37 |     // Main processing loop - sliding window peak detection
38 |     main_loop: for (index_t index = 0; index < input_length - WINDOW_LENGTH + 1; index++) {
39 | #ifdef __SYNTHESIS__
40 |         #pragma HLS PIPELINE II=1
41 |         #pragma HLS LOOP_TRIPCOUNT min=5991 max=5991 avg=5991
42 | #endif
43 |         
44 |         // Shift buffers and add new samples (hardware-friendly shift register)
45 |         shift_xcorr: for (int i = WINDOW_LENGTH - 1; i > 0; i--) {
46 | #ifdef __SYNTHESIS__
47 |             #pragma HLS UNROLL
48 | #endif
49 |             xcorrBuffer[i] = xcorrBuffer[i-1];
50 |         }
51 |         xcorrBuffer[0] = xcorr[index];
52 |         
53 |         shift_threshold: for (int i = WINDOW_LENGTH - 1; i > 0; i--) {
54 | #ifdef __SYNTHESIS__
55 |             #pragma HLS UNROLL
56 | #endif
57 |             thresholdBuffer[i] = thresholdBuffer[i-1];
58 |         }
59 |         thresholdBuffer[0] = threshold[index];
60 |         
61 |         // Check for peak only after buffer is full
62 |         if (index >= WINDOW_LENGTH - 1) {
63 |             index_t candidate_location = index - MIDDLE_LOCATION;
64 |             
65 |             // Get middle sample for comparison
66 |             data_t midSample = xcorrBuffer[MIDDLE_LOCATION];
67 |             data_t midThreshold = thresholdBuffer[MIDDLE_LOCATION];
68 |             
69 |             // Check if middle sample is greater than threshold
70 |             bool above_threshold = (midSample > midThreshold);
71 |             
72 |             // Hardware-friendly peak detection: check if middle sample is local maximum
73 |             bool is_peak = true;
74 |             peak_check: for (int i = 0; i < WINDOW_LENGTH; i++) {
75 | #ifdef __SYNTHESIS__
76 |                 #pragma HLS UNROLL
77 | #endif
78 |                 if (i != MIDDLE_LOCATION) {
79 |                     if (xcorrBuffer[i] > midSample) {
80 |                         is_peak = false;
81 |                     }
82 |                 }
83 |             }
84 |             
85 |             // Store peak location if conditions are met
86 |             if (is_peak && above_threshold && peak_count < MAX_PEAKS) {
87 |                 locations[peak_count] = candidate_location + 1; // MATLAB 1-indexed
88 |                 peak_count++;
89 |             }
90 |         }
91 |     }
92 |     
93 |     *num_peaks = peak_count;
94 | }


--------------------------------------------------------------------------------
/prompts/performance_optimization.md:
--------------------------------------------------------------------------------
 1 | # HLS Performance Optimization Guidelines
 2 | 
 3 | ## Task Description
 4 | You are tasked with optimizing an existing HLS C++ implementation to improve performance, reduce resource utilization, or achieve better timing. Your goal is to maintain functional correctness while enhancing design metrics.
 5 | 
 6 | ## Source Files
 7 | The following HLS C++ source files are being provided:
 8 | 
 9 | {{SOURCE_FILES}}
10 | 
11 | ## Performance Metrics
12 | Current performance metrics of the design:
13 | 
14 | {{PERFORMANCE_METRICS}}
15 | 
16 | ## Optimization Goals
17 | - Primary goal: {{PRIMARY_GOAL}} (e.g., "Reduce latency by at least 30%")
18 | - Secondary goal: {{SECONDARY_GOAL}} (e.g., "Maintain or reduce resource utilization")
19 | 
20 | ## Optimization Strategy
21 | 
22 | Please follow this structured approach:
23 | 
24 | 1. **Design Analysis**
25 |    - Analyze the algorithm structure and computational patterns
26 |    - Identify performance bottlenecks in the current implementation
27 |    - Map data dependencies and memory access patterns
28 |    - Recognize rate-limiting operations or loops
29 | 
30 | 2. **Optimization Techniques**
31 |    
32 |    Consider the following optimization categories:
33 |    
34 |    **Loop Optimizations:**
35 |    - Pipeline loops to improve throughput (PIPELINE pragma)
36 |    - Unroll loops to exploit parallelism (UNROLL pragma)
37 |    - Merge loops to reduce overhead
38 |    - Partition loops to enable better scheduling
39 |    
40 |    **Memory Optimizations:**
41 |    - Array partitioning (ARRAY_PARTITION pragma)
42 |    - Memory reshaping for better access patterns
43 |    - Double buffering for overlapped computation
44 |    - Streaming interfaces for sequential data (hls::stream)
45 |    
46 |    **Data Type Optimizations:**
47 |    - Optimize bit widths using ap_fixed/ap_int
48 |    - Convert floating-point to fixed-point where appropriate
49 |    - Simplify complex operations with lookup tables or approximations
50 |    
51 |    **Function-Level Optimizations:**
52 |    - Inline small functions to reduce function call overhead
53 |    - Dataflow optimization for task-level pipelining
54 |    - Function parallelism with multiple instances
55 |    
56 |    **Interface Optimizations:**
57 |    - Optimize interface protocols (AXI4, AXI-Lite, AXI-Stream)
58 |    - Burst transfers for efficient data movement
59 |    - Register slicing for timing improvement
60 | 
61 | 3. **Implementation Plan**
62 |    - Prioritize optimizations based on impact vs. effort
63 |    - Plan incremental changes that can be verified individually
64 |    - Consider trade-offs between different metrics (latency vs. area)
65 | 
66 | ## Response Format
67 | 
68 | Please structure your response as follows:
69 | 
70 | ### 1. Design Analysis
71 | A summary of your analysis of the current implementation, identifying bottlenecks and opportunities.
72 | 
73 | ### 2. Recommended Optimizations
74 | For each file requiring changes:
75 | - The file name
76 | - Description of proposed optimizations
77 | - Code snippets showing the modifications with added pragmas or code changes
78 | - Expected impact of each optimization
79 | 
80 | ### 3. Implementation Priority
81 | A prioritized list of optimizations, explaining which should be implemented first.
82 | 
83 | ### 4. Expected Outcomes
84 | Predictions about the performance improvements that could be achieved.
85 | 
86 | ### 5. Verification Plan
87 | Suggestions for verifying that the optimizations maintain functional correctness.
88 | 
89 | ## Additional Guidelines
90 | - Focus on HLS-specific optimizations, not general C++ performance improvements
91 | - Explain the reasoning behind each optimization
92 | - Consider Xilinx/Intel FPGA architecture specifics when relevant
93 | - Indicate any potential risks or trade-offs for each optimization
94 | - When multiple approaches could work, explain the pros and cons of each
95 | 


--------------------------------------------------------------------------------
/workflows/default_workflow.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "generate_code": {
  3 |     "agent": "code_generator",
  4 |     "context": {
  5 |       "args": {
  6 |         "matlab_file": ["<MATLAB_FILE>"],
  7 |         "prompt": "<PROMPT_FILE>",
  8 |         "prompt_name": "hls_generation",
  9 |         "output_dir": "<OUTPUT_DIR>",
 10 |         "model": "gemini-2.5-pro-exp-03-25",
 11 |         "timeout": 600
 12 |       }
 13 |     },
 14 |     "update_context": {
 15 |       "component_dir": "output.stdout"
 16 |     },
 17 |     "next": "generate_makefile",
 18 |     "on_error": "stop"
 19 |   },
 20 |   "generate_makefile": {
 21 |     "agent": "makefile_generator",
 22 |     "context": {
 23 |       "work_dir": "<COMPONENT_DIR>",
 24 |       "component": "<COMPONENT>",
 25 |       "template_makefile": "<PROJECT_DIR>/scripts/Makefile"
 26 |     },
 27 |     "next": "build_csim",
 28 |     "on_error": "stop"
 29 |   },
 30 |   "build_csim": {
 31 |     "agent": "hls_builder",
 32 |     "context": {
 33 |       "work_dir": "<COMPONENT_DIR>",
 34 |       "target": "csim"
 35 |     },
 36 |     "next": "build_csynth",
 37 |     "on_error": "debug_errors"
 38 |   },
 39 |   "debug_errors": {
 40 |     "agent": "debug_assistant",
 41 |     "context": {
 42 |       "args": {
 43 |         "error_log": "<COMPONENT_DIR>/proj_<COMPONENT>/solution1/csim/report/<COMPONENT>_csim.log",
 44 |         "source_file": [
 45 |           "<COMPONENT_DIR>/<COMPONENT>.hpp",
 46 |           "<COMPONENT_DIR>/<COMPONENT>.cpp",
 47 |           "<COMPONENT_DIR>/<COMPONENT>_tb.cpp"
 48 |         ],
 49 |         "model": "gemini-2.5-pro-exp-03-25",
 50 |         "timeout": 300
 51 |       }
 52 |     },
 53 |     "next": "build_csim",
 54 |     "on_error": "build_csynth"
 55 |   },
 56 |   "build_csynth": {
 57 |     "agent": "hls_builder",
 58 |     "context": {
 59 |       "work_dir": "<COMPONENT_DIR>",
 60 |       "target": "csynth"
 61 |     },
 62 |     "next": {
 63 |       "context.get('optimize', False) == True": "optimize_code",
 64 |       "default": "build_cosim"
 65 |     },
 66 |     "on_error": "stop"
 67 |   },
 68 |   "optimize_code": {
 69 |     "agent": "performance_optimizer",
 70 |     "context": {
 71 |       "args": {
 72 |         "source_dir": "<COMPONENT_DIR>",
 73 |         "prompt_name": "performance_optimization",
 74 |         "model": "gemini-2.5-pro-exp-03-25",
 75 |         "primary_goal": "Reduce latency by at least 20%",
 76 |         "secondary_goal": "Maintain resource utilization"
 77 |       }
 78 |     },
 79 |     "next": "build_csynth_optimized",
 80 |     "on_error": "build_cosim"
 81 |   },
 82 |   "build_csynth_optimized": {
 83 |     "agent": "hls_builder",
 84 |     "context": {
 85 |       "work_dir": "<COMPONENT_DIR>",
 86 |       "target": "csynth"
 87 |     },
 88 |     "next": "build_cosim",
 89 |     "on_error": "build_cosim"
 90 |   },
 91 |   "build_cosim": {
 92 |     "agent": "hls_builder",
 93 |     "context": {
 94 |       "work_dir": "<COMPONENT_DIR>",
 95 |       "target": "cosim"
 96 |     },
 97 |     "next": "export_ip",
 98 |     "on_error": "stop"
 99 |   },
100 |   "export_ip": {
101 |     "agent": "hls_builder",
102 |     "context": {
103 |       "work_dir": "<COMPONENT_DIR>",
104 |       "target": "export_ip"
105 |     },
106 |     "next": "build_impl",
107 |     "on_error": "build_impl"
108 |   },
109 |   "build_impl": {
110 |     "agent": "hls_builder",
111 |     "context": {
112 |       "work_dir": "<COMPONENT_DIR>",
113 |       "target": "impl"
114 |     },
115 |     "next": "generate_documentation",
116 |     "on_error": "generate_documentation"
117 |   },
118 |   "generate_documentation": {
119 |     "agent": "documentation_generator",
120 |     "context": {
121 |       "component_dir": "<COMPONENT_DIR>",
122 |       "component": "<COMPONENT>",
123 |       "output_format": ["readme", "paper"],
124 |       "model": "gemini-2.5-pro-exp-03-25"
125 |     },
126 |     "on_error": "stop"
127 |   }
128 | }
129 | 


--------------------------------------------------------------------------------
/example/peakPicker.cpp:
--------------------------------------------------------------------------------
  1 | #include "peakPicker.hpp"
  2 | 
  3 | #ifdef __SYNTHESIS__
  4 | #include <hls_stream.h>
  5 | #endif
  6 | 
  7 | // Ultra-optimized implementation targeting II=1 
  8 | void peakPicker_wrapper(
  9 |     data_t xcorr[MAX_INPUT_SIZE],
 10 |     data_t threshold[MAX_INPUT_SIZE], 
 11 |     index_t input_length,
 12 |     index_t locations[MAX_PEAKS],
 13 |     index_t* num_peaks
 14 | ) {
 15 | #ifdef __SYNTHESIS__
 16 |     // Optimized interface pragmas - consolidated AXI bundles
 17 |     #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem max_read_burst_length=256
 18 |     #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem max_read_burst_length=256
 19 |     #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem_out max_write_burst_length=100
 20 |     #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem_out
 21 |     #pragma HLS INTERFACE s_axilite port=input_length bundle=control
 22 |     #pragma HLS INTERFACE s_axilite port=return bundle=control
 23 | #endif
 24 |     
 25 |     // Optimized circular buffer using explicit shift register pattern
 26 |     data_t xcorr_sr[WINDOW_LENGTH];
 27 |     data_t threshold_sr[WINDOW_LENGTH];
 28 |     
 29 | #ifdef __SYNTHESIS__
 30 |     // Complete array partitioning for parallel access
 31 |     #pragma HLS ARRAY_PARTITION variable=xcorr_sr complete dim=1
 32 |     #pragma HLS ARRAY_PARTITION variable=threshold_sr complete dim=1
 33 | #endif
 34 |     
 35 |     // Initialize shift registers
 36 |     init_sr: for (int i = 0; i < WINDOW_LENGTH; i++) {
 37 | #ifdef __SYNTHESIS__
 38 |         #pragma HLS UNROLL
 39 | #endif
 40 |         xcorr_sr[i] = 0;
 41 |         threshold_sr[i] = 0;
 42 |     }
 43 |     
 44 |     index_t peak_count = 0;
 45 |     
 46 |     // Ultra-optimized main loop - processes all samples including initial window fill
 47 |     ultra_main_loop: for (index_t idx = 0; idx < input_length; idx++) {
 48 | #ifdef __SYNTHESIS__
 49 |         #pragma HLS PIPELINE II=1
 50 |         #pragma HLS LOOP_TRIPCOUNT min=6001 max=6001 avg=6001
 51 |         // Force dependency analysis to avoid false dependencies
 52 |         #pragma HLS DEPENDENCE variable=xcorr_sr inter false
 53 |         #pragma HLS DEPENDENCE variable=threshold_sr inter false
 54 | #endif
 55 |         
 56 |         // Read new samples
 57 |         data_t new_xcorr = xcorr[idx];
 58 |         data_t new_threshold = threshold[idx];
 59 |         
 60 |         // Explicit shift register implementation (completely unrolled)
 61 |         // This avoids memory dependency issues
 62 |         // Manual shift register unrolling for maximum performance
 63 |         xcorr_sr[10] = xcorr_sr[9];
 64 |         xcorr_sr[9] = xcorr_sr[8];
 65 |         xcorr_sr[8] = xcorr_sr[7];
 66 |         xcorr_sr[7] = xcorr_sr[6];
 67 |         xcorr_sr[6] = xcorr_sr[5];
 68 |         xcorr_sr[5] = xcorr_sr[4];
 69 |         xcorr_sr[4] = xcorr_sr[3];
 70 |         xcorr_sr[3] = xcorr_sr[2];
 71 |         xcorr_sr[2] = xcorr_sr[1];
 72 |         xcorr_sr[1] = xcorr_sr[0];
 73 |         xcorr_sr[0] = new_xcorr;
 74 |         
 75 |         threshold_sr[10] = threshold_sr[9];
 76 |         threshold_sr[9] = threshold_sr[8];
 77 |         threshold_sr[8] = threshold_sr[7];
 78 |         threshold_sr[7] = threshold_sr[6];
 79 |         threshold_sr[6] = threshold_sr[5];
 80 |         threshold_sr[5] = threshold_sr[4];
 81 |         threshold_sr[4] = threshold_sr[3];
 82 |         threshold_sr[3] = threshold_sr[2];
 83 |         threshold_sr[2] = threshold_sr[1];
 84 |         threshold_sr[1] = threshold_sr[0];
 85 |         threshold_sr[0] = new_threshold;
 86 |         
 87 |         // Peak detection (starts after window is filled)
 88 |         if (idx >= WINDOW_LENGTH - 1) {
 89 |             // Get middle sample (index 5 for window of 11)
 90 |             data_t mid_xcorr = xcorr_sr[MIDDLE_LOCATION];
 91 |             data_t mid_threshold = threshold_sr[MIDDLE_LOCATION];
 92 |             
 93 |             // Threshold check
 94 |             bool above_threshold = (mid_xcorr > mid_threshold);
 95 |             
 96 |             // Parallel peak detection - fully unrolled comparison
 97 |             bool is_peak = (xcorr_sr[0] <= mid_xcorr) && 
 98 |                           (xcorr_sr[1] <= mid_xcorr) && 
 99 |                           (xcorr_sr[2] <= mid_xcorr) && 
100 |                           (xcorr_sr[3] <= mid_xcorr) && 
101 |                           (xcorr_sr[4] <= mid_xcorr) && 
102 |                           // Skip middle element (index 5)
103 |                           (xcorr_sr[6] <= mid_xcorr) && 
104 |                           (xcorr_sr[7] <= mid_xcorr) && 
105 |                           (xcorr_sr[8] <= mid_xcorr) && 
106 |                           (xcorr_sr[9] <= mid_xcorr) && 
107 |                           (xcorr_sr[10] <= mid_xcorr);
108 |             
109 |             // Compute peak location
110 |             index_t peak_location = idx - MIDDLE_LOCATION + 1; // MATLAB 1-indexed
111 |             
112 |             // Conditional peak storage
113 |             if (is_peak && above_threshold && peak_count < MAX_PEAKS) {
114 |                 locations[peak_count] = peak_location;
115 |                 peak_count++;
116 |             }
117 |         }
118 |     }
119 |     
120 |     *num_peaks = peak_count;
121 | }
122 | 
123 | // Alias for backward compatibility
124 | void peakPicker(
125 |     data_t xcorr[MAX_INPUT_SIZE],
126 |     data_t threshold[MAX_INPUT_SIZE], 
127 |     index_t input_length,
128 |     index_t locations[MAX_PEAKS],
129 |     index_t* num_peaks
130 | ) {
131 |     peakPicker_wrapper(xcorr, threshold, input_length, locations, num_peaks);
132 | }


--------------------------------------------------------------------------------
/scripts/validate_report_parsing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import argparse
  6 | import json
  7 | import logging
  8 | from pathlib import Path
  9 | 
 10 | # Set up logging
 11 | logging.basicConfig(
 12 |     level=logging.INFO,
 13 |     format="%(asctime)s [%(levelname)s] %(message)s",
 14 |     handlers=[logging.StreamHandler()]
 15 | )
 16 | logger = logging.getLogger("report_validator")
 17 | 
 18 | # Add the scripts directory to the path so we can import the agent classes
 19 | script_dir = Path(__file__).parent
 20 | sys.path.insert(0, str(script_dir))
 21 | 
 22 | # Import agent framework components
 23 | from agent_framework import DocumentationAgent
 24 | 
 25 | def validate_report_parsing(component_dir):
 26 |     """Validate the parsing of HLS implementation and latency reports."""
 27 |     if not os.path.exists(component_dir):
 28 |         logger.error(f"Component directory not found: {component_dir}")
 29 |         return False
 30 |     
 31 |     # Create a temporary documentation agent for testing
 32 |     doc_agent = DocumentationAgent("test_doc_agent", "Test documentation agent")
 33 |     
 34 |     # Test report parsing
 35 |     try:
 36 |         logger.info(f"Analyzing reports in {component_dir}")
 37 |         metrics = doc_agent.analyze_reports(component_dir)
 38 |         
 39 |         if "error" in metrics:
 40 |             logger.error(f"Error in report analysis: {metrics['error']}")
 41 |             return False
 42 |         
 43 |         # Check if any metrics were found
 44 |         resource_count = sum(len(impl) for impl in metrics.get("resources", {}).values())
 45 |         timing_count = sum(len(impl) for impl in metrics.get("timing", {}).values())
 46 |         latency_count = len(metrics.get("latency", {}))
 47 |         
 48 |         logger.info(f"Found resource metrics: {resource_count} entries")
 49 |         logger.info(f"Found timing metrics: {timing_count} entries")
 50 |         logger.info(f"Found latency metrics: {latency_count} entries")
 51 |         
 52 |         # Check for specific expected fields
 53 |         if "resources" in metrics and metrics["resources"]:
 54 |             first_impl = next(iter(metrics["resources"]))
 55 |             first_res = metrics["resources"][first_impl]
 56 |             logger.info(f"Resource metrics example ({first_impl}): {first_res}")
 57 |         
 58 |         if "timing" in metrics and metrics["timing"]:
 59 |             first_impl = next(iter(metrics["timing"]))
 60 |             first_timing = metrics["timing"][first_impl]
 61 |             logger.info(f"Timing metrics example ({first_impl}): {first_timing}")
 62 |             
 63 |             # Verify MHz conversion
 64 |             for key, value in first_timing.items():
 65 |                 if key.endswith("_MHz"):
 66 |                     logger.info(f"  Found MHz conversion: {key} = {value}")
 67 |         
 68 |         if "latency" in metrics and metrics["latency"]:
 69 |             first_impl = next(iter(metrics["latency"]))
 70 |             first_latency = metrics["latency"][first_impl]
 71 |             logger.info(f"Latency metrics example ({first_impl}): {first_latency}")
 72 |         
 73 |         # Check if markdown report was generated
 74 |         md_report_path = os.path.join(component_dir, "performance_metrics.md")
 75 |         if os.path.exists(md_report_path):
 76 |             logger.info(f"Markdown performance report generated at: {md_report_path}")
 77 |             with open(md_report_path, 'r') as f:
 78 |                 report_content = f.read()
 79 |             logger.info(f"Report length: {len(report_content)} characters")
 80 |         else:
 81 |             logger.warning(f"No markdown report found at: {md_report_path}")
 82 |         
 83 |         # Verify data extraction for documentation
 84 |         test_prompt = doc_agent._create_documentation_prompt(
 85 |             {"workflow_summary": {}},
 86 |             metrics,
 87 |             component_dir,
 88 |             {"component": os.path.basename(component_dir)},
 89 |             ["readme"],
 90 |             None
 91 |         )
 92 |         
 93 |         logger.info(f"Generated documentation prompt length: {len(test_prompt)} characters")
 94 |         
 95 |         # Extract performance metrics section from the prompt
 96 |         import re
 97 |         perf_section = re.search(r"## Performance Metrics\s*\n(.*?)(?:\n##|\Z)", test_prompt, re.DOTALL)
 98 |         if perf_section:
 99 |             logger.info("Performance metrics section found in documentation prompt")
100 |             performance_text = perf_section.group(1)
101 |             logger.info(f"Performance section length: {len(performance_text)} characters")
102 |         else:
103 |             logger.warning("Performance metrics section not found in documentation prompt")
104 |         
105 |         return True
106 |         
107 |     except Exception as e:
108 |         logger.error(f"Error validating report parsing: {e}")
109 |         import traceback
110 |         logger.error(traceback.format_exc())
111 |         return False
112 | 
113 | def main():
114 |     parser = argparse.ArgumentParser(description="Validate HLS report parsing and documentation generation")
115 |     parser.add_argument("--component_dir", type=str, required=True, 
116 |                         help="Path to component directory containing HLS reports")
117 |     
118 |     args = parser.parse_args()
119 |     
120 |     success = validate_report_parsing(args.component_dir)
121 |     
122 |     if success:
123 |         logger.info("Validation completed successfully")
124 |         sys.exit(0)
125 |     else:
126 |         logger.error("Validation failed")
127 |         sys.exit(1)
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/peakPicker.cpp:
--------------------------------------------------------------------------------
  1 | /* AUTO-EDITED BY DEBUG ASSISTANT */
  2 | #include "peakPicker.hpp"
  3 | 
  4 | /**
  5 |  * @brief Core implementation of the peakPicker algorithm for HLS.
  6 |  *
  7 |  * Implements a sliding window peak detector. A sample is considered a peak if:
  8 |  * 1. It is the maximum value within a window of WINDOW_LENGTH samples centered around it.
  9 |  * 2. Its value exceeds the corresponding threshold value.
 10 |  *
 11 |  * This function processes streaming input data and outputs the 0-based indices of detected peaks.
 12 |  */
 13 | void peakPicker(
 14 |     hls::stream<Data_t>& xcorrStream,
 15 |     hls::stream<Data_t>& thresholdStream,
 16 |     hls::stream<Index_t>& locationStream,
 17 |     int numSamples
 18 | ) {
 19 |     // Interface pragmas are usually placed here or in a separate directives file
 20 |     // #pragma HLS INTERFACE axis port=xcorrStream
 21 |     // #pragma HLS INTERFACE axis port=thresholdStream
 22 |     // #pragma HLS INTERFACE axis port=locationStream
 23 |     // #pragma HLS INTERFACE s_axilite port=numSamples bundle=control
 24 |     // #pragma HLS INTERFACE s_axilite port=return bundle=control
 25 | 
 26 |     // Buffers to hold the current window of data and thresholds
 27 |     // These implement the sliding window mechanism.
 28 |     Data_t xcorrBuffer[WINDOW_LENGTH];
 29 |     Data_t thresholdBuffer[WINDOW_LENGTH];
 30 | 
 31 |     // Partitioning the arrays allows parallel access to elements within the
 32 |     // pipelined loop, mapping them to registers for II=1.
 33 |     #pragma HLS ARRAY_PARTITION variable=xcorrBuffer complete dim=1
 34 |     #pragma HLS ARRAY_PARTITION variable=thresholdBuffer complete dim=1
 35 | 
 36 |     // Initialize buffers (optional, but good practice for simulation)
 37 |     // Can be skipped if the initial state doesn't affect the first valid output
 38 |     // Using an unrolled loop for initialization
 39 |     init_loop:
 40 |     for (int i = 0; i < WINDOW_LENGTH; ++i) {
 41 |         #pragma HLS UNROLL
 42 |         xcorrBuffer[i] = 0;
 43 |         thresholdBuffer[i] = 0;
 44 |     }
 45 | 
 46 |     // Main processing loop iterates through all input samples
 47 |     // Apply PIPELINE directive for high throughput (initiation interval II=1)
 48 |     main_loop:
 49 |     for (int i = 0; i < numSamples; ++i) {
 50 |         #pragma HLS PIPELINE II=1
 51 | 
 52 |         // 1. Shift Buffers: Make space for the new sample at index 0
 53 |         // Shift existing elements towards the end of the buffer
 54 |         shift_loop:
 55 |         for (int k = WINDOW_LENGTH - 1; k > 0; --k) {
 56 |             #pragma HLS UNROLL // Unroll this small loop for efficiency
 57 |             xcorrBuffer[k] = xcorrBuffer[k - 1];
 58 |             thresholdBuffer[k] = thresholdBuffer[k - 1];
 59 |         }
 60 | 
 61 |         // 2. Read New Samples: Read from input streams and place at the start (index 0)
 62 |         // Ensure streams are not empty before reading (HLS streams block if empty)
 63 |         Data_t newXcorrSample = xcorrStream.read();
 64 |         Data_t newThresholdSample = thresholdStream.read();
 65 |         xcorrBuffer[0] = newXcorrSample;
 66 |         thresholdBuffer[0] = newThresholdSample;
 67 | 
 68 |         // 3. Peak Detection Logic: Start after the window is filled
 69 |         // The first potential peak can be checked when i = WINDOW_LENGTH - 1
 70 |         // At this point, the sample corresponding to the middle of the *first full window*
 71 |         // is located at xcorrBuffer[MIDDLE_LOCATION]. This sample was originally read
 72 |         // at iteration i = (WINDOW_LENGTH - 1) - MIDDLE_LOCATION = i - MIDDLE_LOCATION.
 73 |         if (i >= WINDOW_LENGTH - 1) {
 74 |             // Get the sample and threshold at the center of the current window
 75 |             // This sample corresponds to the one read 'MIDDLE_LOCATION' iterations ago.
 76 |             Data_t middleSample = xcorrBuffer[MIDDLE_LOCATION];
 77 |             Data_t middleThreshold = thresholdBuffer[MIDDLE_LOCATION];
 78 | 
 79 |             // Check if the middle sample is the maximum in the window
 80 |             bool isPeak = true; // Assume it's a peak initially
 81 |             compare_loop:
 82 |             for (int k = 0; k < WINDOW_LENGTH; ++k) {
 83 |                  #pragma HLS UNROLL // Unroll comparison loop for parallelism
 84 |                 // Check if any other sample in the window is strictly greater
 85 |                 // Note: MATLAB's `findpeaks` often uses >= (non-strictly greater),
 86 |                 // but the reference comment suggests middleSample >= all others.
 87 |                 // This implementation finds peaks strictly greater than neighbors.
 88 |                 // If equality is allowed (plateaus), the condition might need adjustment.
 89 |                 // The current logic: middleSample must be >= all other elements.
 90 |                 if (k != MIDDLE_LOCATION && xcorrBuffer[k] > middleSample) {
 91 |                     isPeak = false;
 92 |                     // No need to 'break' here if the loop is fully unrolled,
 93 |                     // as all comparisons happen in parallel. But break is harmless
 94 |                     // in C simulation and might help logic synthesis slightly.
 95 |                     break;
 96 |                 }
 97 |             }
 98 | 
 99 |             // Check if the peak condition is met (local maximum AND above threshold)
100 |             if (isPeak && (middleSample > middleThreshold)) {
101 |                 // Calculate the 0-based index of the peak in the original input stream
102 |                 // The sample currently at xcorrBuffer[MIDDLE_LOCATION] was read
103 |                 // MIDDLE_LOCATION iterations ago. Its original index was i - MIDDLE_LOCATION.
104 |                 Index_t peakLocation = i - MIDDLE_LOCATION;
105 | 
106 |                 // Write the detected peak location (0-based index) to the output stream
107 |                 locationStream.write(peakLocation);
108 |             }
109 |         }
110 |     } // end main_loop
111 | }


--------------------------------------------------------------------------------
/example/peakPicker_optimized.cpp:
--------------------------------------------------------------------------------
  1 | #include "peakPicker.hpp"
  2 | 
  3 | #ifdef __SYNTHESIS__
  4 | #include <hls_stream.h>
  5 | #endif
  6 | 
  7 | // Optimized streaming-based implementation for lowest latency
  8 | void peakPicker(
  9 |     hls::stream<data_t>& xcorr_stream,
 10 |     hls::stream<data_t>& threshold_stream,
 11 |     index_t input_length,
 12 |     hls::stream<index_t>& locations_stream,
 13 |     index_t* num_peaks
 14 | ) {
 15 | #ifdef __SYNTHESIS__
 16 |     // Optimized interface pragmas for streaming
 17 |     #pragma HLS INTERFACE axis port=xcorr_stream
 18 |     #pragma HLS INTERFACE axis port=threshold_stream
 19 |     #pragma HLS INTERFACE axis port=locations_stream
 20 |     #pragma HLS INTERFACE s_axilite port=input_length bundle=control
 21 |     #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem
 22 |     #pragma HLS INTERFACE s_axilite port=return bundle=control
 23 |     
 24 |     // Dataflow optimization for task-level pipelining
 25 |     #pragma HLS DATAFLOW
 26 | #endif
 27 |     
 28 |     // Optimized circular buffer implementation using shift registers
 29 |     data_t xcorr_window[WINDOW_LENGTH];
 30 |     data_t threshold_window[WINDOW_LENGTH];
 31 |     
 32 | #ifdef __SYNTHESIS__
 33 |     #pragma HLS ARRAY_PARTITION variable=xcorr_window complete dim=1
 34 |     #pragma HLS ARRAY_PARTITION variable=threshold_window complete dim=1
 35 | #endif
 36 |     
 37 |     // Initialize windows
 38 |     init_window: for (int i = 0; i < WINDOW_LENGTH; i++) {
 39 | #ifdef __SYNTHESIS__
 40 |         #pragma HLS UNROLL
 41 | #endif
 42 |         xcorr_window[i] = 0;
 43 |         threshold_window[i] = 0;
 44 |     }
 45 |     
 46 |     index_t peak_count = 0;
 47 |     
 48 |     // Main optimized processing loop with streaming
 49 |     main_processing: for (index_t idx = 0; idx < input_length; idx++) {
 50 | #ifdef __SYNTHESIS__
 51 |         #pragma HLS PIPELINE II=1
 52 |         #pragma HLS LOOP_TRIPCOUNT min=6001 max=6001 avg=6001
 53 | #endif
 54 |         
 55 |         // Read from streams (single cycle operation)
 56 |         data_t xcorr_sample = xcorr_stream.read();
 57 |         data_t threshold_sample = threshold_stream.read();
 58 |         
 59 |         // Optimized shift register using unrolled operations
 60 | #ifdef __SYNTHESIS__
 61 |         #pragma HLS UNROLL
 62 | #endif
 63 |         shift_registers: for (int i = WINDOW_LENGTH - 1; i > 0; i--) {
 64 |             xcorr_window[i] = xcorr_window[i-1];
 65 |             threshold_window[i] = threshold_window[i-1];
 66 |         }
 67 |         xcorr_window[0] = xcorr_sample;
 68 |         threshold_window[0] = threshold_sample;
 69 |         
 70 |         // Peak detection logic (only after window is filled)
 71 |         if (idx >= WINDOW_LENGTH - 1) {
 72 |             // Get middle sample
 73 |             data_t mid_xcorr = xcorr_window[MIDDLE_LOCATION];
 74 |             data_t mid_threshold = threshold_window[MIDDLE_LOCATION];
 75 |             
 76 |             // Threshold check
 77 |             bool above_threshold = (mid_xcorr > mid_threshold);
 78 |             
 79 |             // Parallel peak comparison using unrolled loop
 80 |             bool is_local_max = true;
 81 | #ifdef __SYNTHESIS__
 82 |             #pragma HLS UNROLL
 83 | #endif
 84 |             peak_comparison: for (int i = 0; i < WINDOW_LENGTH; i++) {
 85 |                 if (i != MIDDLE_LOCATION && xcorr_window[i] >= mid_xcorr) {
 86 |                     is_local_max = false;
 87 |                 }
 88 |             }
 89 |             
 90 |             // Output peak location if detected
 91 |             if (is_local_max && above_threshold && peak_count < MAX_PEAKS) {
 92 |                 index_t peak_location = idx - MIDDLE_LOCATION + 1; // MATLAB 1-indexed
 93 |                 locations_stream.write(peak_location);
 94 |                 peak_count++;
 95 |             }
 96 |         }
 97 |     }
 98 |     
 99 |     *num_peaks = peak_count;
100 | }
101 | 
102 | // Wrapper function for backward compatibility with array interface
103 | void peakPicker_wrapper(
104 |     data_t xcorr[MAX_INPUT_SIZE],
105 |     data_t threshold[MAX_INPUT_SIZE], 
106 |     index_t input_length,
107 |     index_t locations[MAX_PEAKS],
108 |     index_t* num_peaks
109 | ) {
110 | #ifdef __SYNTHESIS__
111 |     // Interface pragmas for wrapper
112 |     #pragma HLS INTERFACE m_axi port=xcorr offset=slave bundle=gmem0
113 |     #pragma HLS INTERFACE m_axi port=threshold offset=slave bundle=gmem1
114 |     #pragma HLS INTERFACE m_axi port=locations offset=slave bundle=gmem2
115 |     #pragma HLS INTERFACE m_axi port=num_peaks offset=slave bundle=gmem3
116 |     #pragma HLS INTERFACE s_axilite port=input_length bundle=control
117 |     #pragma HLS INTERFACE s_axilite port=return bundle=control
118 |     
119 |     #pragma HLS DATAFLOW
120 | #endif
121 |     
122 |     // Create streams
123 |     static hls::stream<data_t> xcorr_stream("xcorr_stream");
124 |     static hls::stream<data_t> threshold_stream("threshold_stream");
125 |     static hls::stream<index_t> locations_stream("locations_stream");
126 |     
127 | #ifdef __SYNTHESIS__
128 |     #pragma HLS STREAM variable=xcorr_stream depth=2
129 |     #pragma HLS STREAM variable=threshold_stream depth=2
130 |     #pragma HLS STREAM variable=locations_stream depth=100
131 | #endif
132 |     
133 |     // Feed input streams
134 |     input_feeder: for (index_t i = 0; i < input_length; i++) {
135 | #ifdef __SYNTHESIS__
136 |         #pragma HLS PIPELINE II=1
137 | #endif
138 |         xcorr_stream.write(xcorr[i]);
139 |         threshold_stream.write(threshold[i]);
140 |     }
141 |     
142 |     // Call optimized core function
143 |     index_t temp_num_peaks;
144 |     peakPicker(xcorr_stream, threshold_stream, input_length, locations_stream, &temp_num_peaks);
145 |     
146 |     // Read output stream
147 |     output_collector: for (index_t i = 0; i < temp_num_peaks && i < MAX_PEAKS; i++) {
148 | #ifdef __SYNTHESIS__
149 |         #pragma HLS PIPELINE II=1
150 | #endif
151 |         locations[i] = locations_stream.read();
152 |     }
153 |     
154 |     *num_peaks = temp_num_peaks;
155 | }


--------------------------------------------------------------------------------
/prompts/documentation_template.md:
--------------------------------------------------------------------------------
  1 | # General Documentation Template
  2 | 
  3 | ## Context
  4 | You are tasked with creating comprehensive documentation for an FPGA hardware accelerator design. This documentation should be appropriate for technical users who need to understand, use, or modify this hardware component.
  5 | 
  6 | ## Component Information
  7 | - **Component Name**: {component_name}
  8 | - **Design Purpose**: [Extract from context]
  9 | - **Generation Method**: AI-assisted design using LLM
 10 | - **Target Platform**: Xilinx FPGA
 11 | 
 12 | ## Documentation Structure
 13 | 
 14 | Create thorough documentation following this structure:
 15 | 
 16 | ### 1. Overview
 17 | - Component purpose and functionality
 18 | - Key features
 19 | - Target applications
 20 | - Design approach
 21 | 
 22 | ### 2. Architecture
 23 | - Block diagram description
 24 | - Interface specification
 25 | - Data flow 
 26 | - Key components
 27 | 
 28 | **Architecture Visualization**: Include a Mermaid diagram showing the component architecture. Example:
 29 | 
 30 | ```mermaid
 31 | flowchart TD
 32 |     A["Input Interface"] --> B["Core Processing"]
 33 |     B --> C["Output Interface"]
 34 |     D["Control Logic"] --> B
 35 |     E["Memory"] <--> B
 36 | ```
 37 | 
 38 | ### 3. Implementation Details
 39 | - Algorithm description
 40 | - HLS optimizations
 41 | - Resource utilization
 42 | - Performance characteristics
 43 | 
 44 | **Algorithm Visualization**: Include appropriate diagrams showing the algorithm implementation:
 45 | 
 46 | ```mermaid
 47 | flowchart LR
 48 |     A["Data Input"] --> B["Stage 1"]
 49 |     B --> C["Stage 2"]
 50 |     C --> D["Stage 3"]
 51 |     D --> E["Data Output"]
 52 |     
 53 |     F["Configuration"] --> B
 54 |     F --> C
 55 |     F --> D
 56 | ```
 57 | 
 58 | ### 4. Usage Guide
 59 | - Integration instructions
 60 | - API/interface description
 61 | - Example usage
 62 | - Configuration options
 63 | 
 64 | **Integration Visualization**: Show integration flow with other components:
 65 | 
 66 | ```mermaid
 67 | flowchart LR
 68 |     A["Host System"] --> B["Driver"]
 69 |     B --> C["{component_name}"]
 70 |     C --> D["Memory"]
 71 |     C --> E["Other IP Cores"]
 72 | ```
 73 | 
 74 | ### 5. Performance Analysis
 75 | - Latency and throughput
 76 | - Resource efficiency
 77 | - Comparative metrics
 78 | - Limitations
 79 | 
 80 | **Performance Visualization**: Include resource utilization tables and charts:
 81 | 
 82 | | Resource | Utilization | Available | Utilization % |
 83 | |----------|-------------|-----------|---------------|
 84 | | LUT      | X           | X_total   | X_percent     |
 85 | | FF       | Y           | Y_total   | Y_percent     |
 86 | | DSP      | Z           | Z_total   | Z_percent     |
 87 | | BRAM     | W           | W_total   | W_percent     |
 88 | 
 89 | 
 90 | Include performance metrics (Timing & Latency) in well-formatted tables:
 91 | 
 92 | ## Timing
 93 | 
 94 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) |
 95 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------|
 96 | | solution1 | 3.90 | 256.00 | u.uu | vvv.vv | x.xx | yyy.yy |
 97 | 
 98 | ## Latency
 99 | 
100 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) |
101 | |---------------|-------------|-------------|-----------------|-----------------------------|
102 | | solution1 | X | Y | - | - |
103 | 
104 | 
105 | 
106 | ### 6. Verification
107 | - Test methodology
108 | - Simulation results
109 | - Validation approach
110 | - Known issues
111 | 
112 | **Verification Visualization**: Use sequence diagrams to show test procedures:
113 | 
114 | ```mermaid
115 | sequenceDiagram
116 |     participant TB as Testbench
117 |     participant DUT as Design Under Test
118 |     
119 |     TB->>DUT: Initialize
120 |     TB->>DUT: Apply Test Vector 1
121 |     DUT-->>TB: Output Results 1
122 |     TB->>TB: Verify Results 1
123 |     TB->>DUT: Apply Test Vector 2
124 |     DUT-->>TB: Output Results 2
125 |     TB->>TB: Verify Results 2
126 | ```
127 | 
128 | ### 7. Development Process
129 | - Design evolution
130 | - Challenges and solutions
131 | - AI assistance insights
132 | - Optimization history
133 | 
134 | **Development Visualization**: Use a state or flowchart diagram to show the design evolution:
135 | 
136 | ```mermaid
137 | stateDiagram-v2
138 |     [*] --> Initial_Design
139 |     Initial_Design --> Functional_Implementation
140 |     Functional_Implementation --> Performance_Optimization
141 |     Performance_Optimization --> Resource_Optimization
142 |     Resource_Optimization --> Final_Design
143 |     Final_Design --> [*]
144 | ```
145 | 
146 | ## Source Information
147 | Use the following source material to inform your documentation:
148 | 
149 | ### Source Code
150 | ```cpp
151 | // Header file
152 | {header_code}
153 | ```
154 | 
155 | ```cpp
156 | // Implementation file
157 | {implementation_code}
158 | ```
159 | 
160 | ```cpp
161 | // Testbench file
162 | {testbench_code}
163 | ```
164 | 
165 | ### Performance Metrics
166 | {performance_metrics}
167 | 
168 | ### Implementation Challenges
169 | {errors_encountered}
170 | 
171 | ### Debugging Methods
172 | {debugging_methods}
173 | 
174 | ## Diagram Examples
175 | The following are examples of different types of Mermaid diagrams you can use:
176 | 
177 | {diagram_examples}
178 | 
179 | ## Chart Examples
180 | The following are examples of different types of tables/charts for performance data:
181 | 
182 | {chart_examples}
183 | 
184 | ## Style Guidelines
185 | - Use clear, professional language
186 | - Include code snippets and examples where helpful
187 | - Use proper technical terminology
188 | - Be concise but thorough
189 | - Use appropriate markdown formatting
190 | - Focus on practical usage information
191 | - Make effective use of diagrams and visualizations
192 | - Use Mermaid diagrams for architecture, flows, and algorithms
193 | - Present performance metrics in well-formatted tables
194 | - Ensure diagrams have clear labels and descriptions
195 | 
196 | Your documentation should serve as a comprehensive reference for this hardware component, with visual elements that enhance understanding.
197 | 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # CLAUDE.md
  2 | 
  3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
  4 | 
  5 | ## Project Overview
  6 | 
  7 | This repository implements an LLM-aided FPGA design flow that converts MATLAB algorithms to optimized HLS C++ implementations. The primary focus is on automating the design workflow using Large Language Models (Claude, GPT-4, Gemini) for 5G NR signal processing components, specifically peak picking algorithms for SSB detection.
  8 | 
  9 | ## Key Architecture Components
 10 | 
 11 | ### 1. Multi-Stage LLM Pipeline
 12 | - **Code Generation**: MATLAB → HLS C++ conversion using structured prompts
 13 | - **Automated Debugging**: AI-powered error analysis and code correction
 14 | - **Agent Framework**: Orchestrates multiple LLM services with fallback mechanisms
 15 | 
 16 | ### 2. Core Scripts (scripts/)
 17 | - `generate_hls_code.py`: Main code generation from MATLAB to HLS C++
 18 | - `debug_assistant.py`: LLM-based debugging of C simulation errors  
 19 | - `agent_framework.py`: Agent orchestration and prompt management
 20 | - Support for Gemini (primary), OpenAI, and Claude APIs with automatic fallback
 21 | 
 22 | ### 3. Prompt Engineering System (prompts/)
 23 | - Structured templates for different tasks (hls_conversion.md, hls_debugging.md, etc.)
 24 | - Domain-specific prompts for 5G signal processing
 25 | - Performance optimization and documentation generation templates
 26 | 
 27 | ## Common Development Commands
 28 | 
 29 | ### HLS Project Development (in implementations/peakPicker/)
 30 | ```bash
 31 | # Full development cycle
 32 | make all                    # Clean, build testbench, run C sim and synthesis
 33 | 
 34 | # Individual stages  
 35 | make tb                     # Build and run standalone C++ testbench
 36 | make csim                   # Run HLS C simulation
 37 | make csynth                 # Run HLS C synthesis
 38 | make cosim                  # Run C/RTL co-simulation
 39 | make export_ip             # Export as IP catalog
 40 | make impl                  # Run Vivado implementation
 41 | make clean                 # Clean generated files
 42 | make help                  # Show all available targets
 43 | ```
 44 | 
 45 | ### Code Generation Workflow
 46 | ```bash
 47 | # Generate HLS from MATLAB (from repository root)
 48 | python3 scripts/generate_hls_code.py \
 49 |   --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m \
 50 |   --prompt prompts/hls_conversion.md \
 51 |   --model gemini-2.0-flash-thinking-exp
 52 | 
 53 | # Debug C simulation errors
 54 | python3 scripts/debug_assistant.py \
 55 |   --error_log implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log \
 56 |   --source_file implementations/peakPicker/peakPicker.cpp implementations/peakPicker/peakPicker.hpp implementations/peakPicker/peakPicker_tb.cpp
 57 | ```
 58 | 
 59 | ### Environment Setup
 60 | ```bash
 61 | # Required: Set Vitis HLS path
 62 | export VITIS_HLS_PATH=/opt/Xilinx/Vitis_HLS/2023.2
 63 | source $VITIS_HLS_PATH/settings64.sh
 64 | 
 65 | # Required: Set at least one API key
 66 | export GEMINI_API_KEY=your_key_here
 67 | # Optional alternatives:
 68 | export OPENAI_API_KEY=your_key_here  
 69 | export CLAUDE_API_KEY=your_key_here
 70 | 
 71 | # Install Python dependencies
 72 | pip install -r requirements.txt
 73 | ```
 74 | 
 75 | ## Hardware Configuration
 76 | 
 77 | - **Target FPGA**: xc7k410t-ffg900-2 (Kintex-7)
 78 | - **Clock Frequency**: 256MHz (3.9ns period)
 79 | - **Clock Uncertainty**: 12.5%
 80 | - **HLS Version**: Vitis HLS 2023.2
 81 | 
 82 | ## LLM Model Selection
 83 | 
 84 | ### Supported Models and Use Cases
 85 | - **gemini-2.0-flash-thinking-exp**: Fast iterations, general debugging
 86 | - **gemini-2.0-pro-exp**: Complex algorithm conversion (default)
 87 | - **gpt-4**: Detailed implementations requiring careful analysis
 88 | - **gpt-3.5-turbo**: Quick prototyping and simple conversions
 89 | - **claude-sonnet**: Algorithm explanations and documentation
 90 | 
 91 | ### API Fallback Order
 92 | 1. Gemini (primary) - good code reasoning and HLS optimization
 93 | 2. OpenAI - comprehensive code generation  
 94 | 3. Claude - detailed algorithmic understanding
 95 | 
 96 | ## File Organization Patterns
 97 | 
 98 | ### Implementation Structure
 99 | ```
100 | implementations/
101 | ├── peakPicker/                    # Component-specific directory
102 | │   ├── Makefile                   # HLS build automation
103 | │   ├── peakPicker.cpp/.hpp        # Generated HLS implementation
104 | │   ├── peakPicker_tb.cpp          # Generated testbench
105 | │   └── documentation/             # Auto-generated docs
106 | ```
107 | 
108 | ### Data and Testing
109 | - `data/`: Test vectors and reference data (automatically included in HLS builds)
110 | - `algorithms/`: MATLAB reference implementations
111 | - Test data files are automatically detected and added to HLS projects
112 | 
113 | ## Key Implementation Notes
114 | 
115 | ### HLS-Specific Considerations
116 | - All implementations target ap_int<> and ap_fixed<> data types for optimal resource usage
117 | - Testbenches automatically load data from `../../data/` directory
118 | - HLS pragmas are used for performance optimization (PIPELINE, UNROLL, ARRAY_PARTITION)
119 | - Interface synthesis uses ap_ctrl_hs with AXI4-Stream for data
120 | 
121 | ### Code Generation Process
122 | 1. Parse MATLAB reference algorithms
123 | 2. Apply domain-specific prompts (5G signal processing context)
124 | 3. Generate three files: header (.hpp), implementation (.cpp), testbench (_tb.cpp)
125 | 4. Automatic verification through C simulation
126 | 5. AI-powered debugging if errors occur
127 | 
128 | ### Error Handling and Debugging
129 | - Automatic error log parsing and analysis
130 | - LLM generates detailed debug reports with specific fixes
131 | - Support for interface mismatches, data type issues, and algorithmic errors
132 | - Debug reports saved to `debug_reports/` with timestamps
133 | 
134 | ## Working with Prompts
135 | 
136 | ### Prompt Template Structure
137 | - **Context Section**: Algorithm purpose and background
138 | - **Task Description**: Clear implementation requirements  
139 | - **Technical Requirements**: HLS-specific coding standards
140 | - **Deliverables**: Expected output files and format
141 | 
142 | ### Prompt Backup System
143 | - All prompts are automatically backed up with timestamps in `prompts/backups/`
144 | - Version control for prompt evolution and A/B testing
145 | - Performance metrics tracking for prompt effectiveness


--------------------------------------------------------------------------------
/example/peakPicker_tb.cpp:
--------------------------------------------------------------------------------
  1 | #include "peakPicker.hpp"
  2 | #include <iostream>
  3 | #include <fstream>
  4 | #include <vector>
  5 | #include <iomanip>
  6 | #include <cmath>
  7 | 
  8 | using namespace std;
  9 | 
 10 | // Function to read data from file
 11 | vector<double> readDataFromFile(const string& filename) {
 12 |     vector<double> data;
 13 |     ifstream file(filename);
 14 |     
 15 |     if (!file.is_open()) {
 16 |         cerr << "Error: Could not open file " << filename << endl;
 17 |         return data;
 18 |     }
 19 |     
 20 |     double value;
 21 |     while (file >> value) {
 22 |         data.push_back(value);
 23 |     }
 24 |     
 25 |     file.close();
 26 |     cout << "Read " << data.size() << " values from " << filename << endl;
 27 |     return data;
 28 | }
 29 | 
 30 | // Function to read reference locations
 31 | vector<int> readReferenceLocations(const string& filename) {
 32 |     vector<int> locations;
 33 |     ifstream file(filename);
 34 |     
 35 |     if (!file.is_open()) {
 36 |         cerr << "Error: Could not open file " << filename << endl;
 37 |         return locations;
 38 |     }
 39 |     
 40 |     int value;
 41 |     while (file >> value) {
 42 |         locations.push_back(value);
 43 |     }
 44 |     
 45 |     file.close();
 46 |     cout << "Read " << locations.size() << " reference locations from " << filename << endl;
 47 |     return locations;
 48 | }
 49 | 
 50 | // Function to write results to file
 51 | void writeResultsToFile(const string& filename, const vector<int>& locations) {
 52 |     ofstream file(filename);
 53 |     
 54 |     if (!file.is_open()) {
 55 |         cerr << "Error: Could not create file " << filename << endl;
 56 |         return;
 57 |     }
 58 |     
 59 |     for (size_t i = 0; i < locations.size(); i++) {
 60 |         file << locations[i];
 61 |         if (i < locations.size() - 1) {
 62 |             file << "\t";
 63 |         }
 64 |     }
 65 |     file << endl;
 66 |     
 67 |     file.close();
 68 |     cout << "Written " << locations.size() << " locations to " << filename << endl;
 69 | }
 70 | 
 71 | int main() {
 72 |     cout << "=== Peak Picker HLS Testbench ===" << endl;
 73 |     
 74 |     // Read input data
 75 |     vector<double> xcorr_data = readDataFromFile("pssCorrMagSq_3_in.txt");
 76 |     vector<double> threshold_data = readDataFromFile("threshold_in.txt");
 77 |     vector<int> ref_locations = readReferenceLocations("locations_3_ref.txt");
 78 |     
 79 |     if (xcorr_data.empty() || threshold_data.empty()) {
 80 |         cerr << "Error: Failed to read input data files" << endl;
 81 |         return -1;
 82 |     }
 83 |     
 84 |     if (xcorr_data.size() != threshold_data.size()) {
 85 |         cerr << "Error: Input data size mismatch" << endl;
 86 |         return -1;
 87 |     }
 88 |     
 89 |     cout << "Input data size: " << xcorr_data.size() << " samples" << endl;
 90 |     
 91 |     // Prepare data for HLS function
 92 |     static data_t xcorr[MAX_INPUT_SIZE];
 93 |     static data_t threshold[MAX_INPUT_SIZE];
 94 |     static index_t locations[MAX_PEAKS];
 95 |     index_t num_peaks = 0;
 96 |     
 97 |     // Convert input data to fixed-point
 98 |     index_t input_length = min((size_t)MAX_INPUT_SIZE, xcorr_data.size());
 99 |     
100 |     for (index_t i = 0; i < input_length; i++) {
101 |         xcorr[i] = (data_t)xcorr_data[i];
102 |         threshold[i] = (data_t)threshold_data[i];
103 |     }
104 |     
105 |     // Initialize output array
106 |     for (int i = 0; i < MAX_PEAKS; i++) {
107 |         locations[i] = 0;
108 |     }
109 |     
110 |     cout << "Calling peakPicker function..." << endl;
111 |     
112 |     // Call the HLS function
113 |     peakPicker(xcorr, threshold, input_length, locations, &num_peaks);
114 |     
115 |     cout << "Peak detection completed. Found " << num_peaks << " peaks." << endl;
116 |     
117 |     // Convert results to vector for easier handling
118 |     vector<int> detected_locations;
119 |     for (index_t i = 0; i < num_peaks; i++) {
120 |         detected_locations.push_back((int)locations[i]);
121 |     }
122 |     
123 |     // Write results to file
124 |     writeResultsToFile("peakLocs_out.txt", detected_locations);
125 |     
126 |     // Compare with reference
127 |     cout << "\n=== Results Comparison ===" << endl;
128 |     cout << "Detected peaks: " << detected_locations.size() << endl;
129 |     cout << "Reference peaks: " << ref_locations.size() << endl;
130 |     
131 |     if (detected_locations.size() != ref_locations.size()) {
132 |         cout << "WARNING: Different number of peaks detected!" << endl;
133 |     }
134 |     
135 |     // Print detected locations
136 |     cout << "\nDetected peak locations: ";
137 |     for (size_t i = 0; i < detected_locations.size(); i++) {
138 |         cout << detected_locations[i];
139 |         if (i < detected_locations.size() - 1) cout << ", ";
140 |     }
141 |     cout << endl;
142 |     
143 |     // Print reference locations
144 |     cout << "Reference peak locations: ";
145 |     for (size_t i = 0; i < ref_locations.size(); i++) {
146 |         cout << ref_locations[i];
147 |         if (i < ref_locations.size() - 1) cout << ", ";
148 |     }
149 |     cout << endl;
150 |     
151 |     // Check if results match
152 |     bool results_match = true;
153 |     if (detected_locations.size() == ref_locations.size()) {
154 |         for (size_t i = 0; i < detected_locations.size(); i++) {
155 |             if (detected_locations[i] != ref_locations[i]) {
156 |                 results_match = false;
157 |                 break;
158 |             }
159 |         }
160 |     } else {
161 |         results_match = false;
162 |     }
163 |     
164 |     cout << "\n=== Test Result ===" << endl;
165 |     if (results_match) {
166 |         cout << "✓ TEST PASSED: Output matches reference" << endl;
167 |         return 0;
168 |     } else {
169 |         cout << "✗ TEST FAILED: Output does not match reference" << endl;
170 |         
171 |         // Calculate error metrics if sizes match
172 |         if (detected_locations.size() == ref_locations.size() && !detected_locations.empty()) {
173 |             double total_abs_error = 0;
174 |             double max_abs_error = 0;
175 |             
176 |             for (size_t i = 0; i < detected_locations.size(); i++) {
177 |                 double abs_error = abs(detected_locations[i] - ref_locations[i]);
178 |                 total_abs_error += abs_error;
179 |                 max_abs_error = max(max_abs_error, abs_error);
180 |             }
181 |             
182 |             double mean_abs_error = total_abs_error / detected_locations.size();
183 |             
184 |             cout << "Error Analysis:" << endl;
185 |             cout << "  Mean absolute error: " << mean_abs_error << endl;
186 |             cout << "  Maximum absolute error: " << max_abs_error << endl;
187 |         }
188 |         
189 |         return 1;
190 |     }
191 | }


--------------------------------------------------------------------------------
/example/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for HLS Project
  2 | 
  3 | # Set the design name
  4 | DESIGN_NAME = peakPicker
  5 | 
  6 | # Configuration variables
  7 | CSIM = 1
  8 | CSYNTH = 1
  9 | COSIM = 1
 10 | EXPORT_IP = 1
 11 | VIVADO_IMPL = 1
 12 | 
 13 | # Hardware configuration
 14 | CLOCK_FREQ = 256
 15 | FPGA_PART = xc7k410t-ffg900-2
 16 | CLOCK_UNCERTAINTY = 12.5
 17 | 
 18 | # Vitis HLS installation path - modify this to match your installation
 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2024.2
 20 | # HLS compiler and flags - use full path to vitis_hls executable
 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls
 22 | 
 23 | # You can also set VITIS_HLS_PATH via environment variable before running make:
 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation
 25 | 
 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command
 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls
 28 | 
 29 | HLS_PROJECT = proj_$(DESIGN_NAME)
 30 | HLS_SOLUTION = solution1
 31 | 
 32 | # C++ compiler and flags for testbench
 33 | CXX = g++
 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17
 35 | INCLUDES = -I$(XILINX_HLS)/include
 36 | 
 37 | # Source files
 38 | SRC_FILES = $(DESIGN_NAME).cpp
 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp
 40 | TEST_DATA_DIR = .
 41 | # Get a list of all text files in the data directory
 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt)
 43 | 
 44 | # Target names
 45 | TB_EXE = $(DESIGN_NAME)_test
 46 | CSIM_TCL = csim.tcl
 47 | CSYNTH_TCL = csynth.tcl
 48 | COSIM_TCL = cosim.tcl
 49 | EXPORT_TCL = export.tcl
 50 | IMPL_TCL = impl.tcl
 51 | 
 52 | # Calculate clock period in ns from MHz
 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc)
 54 | 
 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help
 56 | 
 57 | all: clean tb csim csynth
 58 | 
 59 | # Standalone testbench using GCC
 60 | tb:
 61 | 	@echo "Building standalone testbench..."
 62 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE)
 63 | 	@echo "Running standalone testbench..."
 64 | 	./$(TB_EXE)
 65 | 
 66 | # HLS C Simulation
 67 | csim:
 68 | ifeq ($(CSIM), 1)
 69 | 	@echo "Running HLS C Simulation..."
 70 | 	@echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL)
 71 | 	@echo "set_top peakPicker" >> $(CSIM_TCL)
 72 | 	@echo "add_files $(SRC_FILES)" >> $(CSIM_TCL)
 73 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL)
 74 | 	@for file in $(TEST_DATA_FILES); do \
 75 | 		echo "add_files -tb $$file" >> $(CSIM_TCL); \
 76 | 	done
 77 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL)
 78 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL)
 79 | 	@echo "csim_design" >> $(CSIM_TCL)
 80 | 	@echo "exit" >> $(CSIM_TCL)
 81 | 	$(HLS) -f $(CSIM_TCL)
 82 | endif
 83 | 
 84 | # HLS C Synthesis
 85 | csynth:
 86 | ifeq ($(CSYNTH), 1)
 87 | 	@echo "Running HLS C Synthesis..."
 88 | 	@echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL)
 89 | 	@echo "set_top peakPicker" >> $(CSYNTH_TCL)
 90 | 	@echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL)
 91 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL)
 92 | 	@for file in $(TEST_DATA_FILES); do \
 93 | 		echo "add_files -tb $$file" >> $(CSYNTH_TCL); \
 94 | 	done
 95 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL)
 96 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL)
 97 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL)
 98 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL)
 99 | 	@echo "csynth_design" >> $(CSYNTH_TCL)
100 | 	@echo "exit" >> $(CSYNTH_TCL)
101 | 	$(HLS) -f $(CSYNTH_TCL)
102 | endif
103 | 
104 | # HLS C/RTL Co-simulation
105 | cosim:
106 | ifeq ($(COSIM), 1)
107 | 	@echo "Running HLS C/RTL Co-simulation..."
108 | 	@echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL)
109 | 	@echo "set_top peakPicker" >> $(COSIM_TCL)
110 | 	@echo "add_files $(SRC_FILES)" >> $(COSIM_TCL)
111 | 	@echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL)
112 | 	@for file in $(TEST_DATA_FILES); do \
113 | 		echo "add_files -tb $$file" >> $(COSIM_TCL); \
114 | 	done
115 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL)
116 | 	@echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL)
117 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL)
118 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL)
119 | 	@echo "cosim_design" >> $(COSIM_TCL)
120 | 	@echo "exit" >> $(COSIM_TCL)
121 | 	$(HLS) -f $(COSIM_TCL)
122 | endif
123 | 
124 | # Export RTL as IP
125 | export_ip:
126 | ifeq ($(EXPORT_IP), 1)
127 | 	@echo "Exporting IP..."
128 | 	@echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL)
129 | 	@echo "set_top peakPicker" >> $(EXPORT_TCL)
130 | 	@echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL)
131 | 	@echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL)
132 | 	@for file in $(TEST_DATA_FILES); do \
133 | 		echo "add_files -tb $$file" >> $(EXPORT_TCL); \
134 | 	done
135 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL)
136 | 	@echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL)
137 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL)
138 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL)
139 | 	@echo "export_design -format ip_catalog" >> $(EXPORT_TCL)
140 | 	@echo "exit" >> $(EXPORT_TCL)
141 | 	$(HLS) -f $(EXPORT_TCL)
142 | endif
143 | 
144 | # Run Implementation in Vivado
145 | impl:
146 | ifeq ($(VIVADO_IMPL), 1)
147 | 	@echo "Running Vivado Implementation..."
148 | 	@echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL)
149 | 	@echo "set_top peakPicker" >> $(IMPL_TCL)
150 | 	@echo "add_files $(SRC_FILES)" >> $(IMPL_TCL)
151 | 	@echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL)
152 | 	@for file in $(TEST_DATA_FILES); do \
153 | 		echo "add_files -tb $$file" >> $(IMPL_TCL); \
154 | 	done
155 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL)
156 | 	@echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL)
157 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL)
158 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL)
159 | 	@echo "export_design -flow impl" >> $(IMPL_TCL)
160 | 	@echo "exit" >> $(IMPL_TCL)
161 | 	$(HLS) -f $(IMPL_TCL)
162 | endif
163 | 
164 | # Clean up
165 | clean:
166 | 	@echo "Cleaning up..."
167 | 	rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out
168 | 
169 | # Help information
170 | help:
171 | 	@echo "Makefile for HLS Peak Picker Project"
172 | 	@echo ""
173 | 	@echo "Targets:"
174 | 	@echo "  all        : Run clean, testbench, C simulation, and C synthesis"
175 | 	@echo "  tb         : Build and run standalone testbench"
176 | 	@echo "  csim       : Run HLS C simulation"
177 | 	@echo "  csynth     : Run HLS C synthesis"
178 | 	@echo "  cosim      : Run HLS C/RTL co-simulation"
179 | 	@echo "  export_ip  : Export RTL as IP catalog"
180 | 	@echo "  impl       : Run implementation in Vivado"
181 | 	@echo "  clean      : Clean up generated files"
182 | 	@echo "  help       : Display this help information"
183 | 	@echo ""
184 | 	@echo "Configuration (set to 1 to enable, 0 to disable):"
185 | 	@echo "  CSIM       = $(CSIM)  (C simulation)"
186 | 	@echo "  CSYNTH     = $(CSYNTH)  (C synthesis)"
187 | 	@echo "  COSIM      = $(COSIM)  (C/RTL co-simulation)"
188 | 	@echo "  EXPORT_IP  = $(EXPORT_IP)  (Export IP)"
189 | 	@echo "  VIVADO_IMPL= $(VIVADO_IMPL)  (Vivado implementation)"
190 | 	@echo ""
191 | 	@echo "Hardware Configuration:"
192 | 	@echo "  CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)"
193 | 	@echo "  CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)"
194 | 	@echo "  FPGA_PART  = $(FPGA_PART)  (FPGA part)"
195 | 	@echo "  CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)%  (Clock uncertainty)"
196 | 


--------------------------------------------------------------------------------
/scripts/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for HLS Project
  2 | 
  3 | # Set the design name
  4 | DESIGN_NAME = peakPicker
  5 | 
  6 | # Configuration variables
  7 | CSIM = 1
  8 | CSYNTH = 1
  9 | COSIM = 1
 10 | EXPORT_IP = 1
 11 | VIVADO_IMPL = 1
 12 | 
 13 | # Hardware configuration
 14 | CLOCK_FREQ = 256
 15 | FPGA_PART = xc7k410t-ffg900-2
 16 | CLOCK_UNCERTAINTY = 12.5
 17 | 
 18 | # Vitis HLS installation path - modify this to match your installation
 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2023.2
 20 | # HLS compiler and flags - use full path to vitis_hls executable
 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls
 22 | 
 23 | # You can also set VITIS_HLS_PATH via environment variable before running make:
 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation
 25 | 
 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command
 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls
 28 | 
 29 | HLS_PROJECT = proj_$(DESIGN_NAME)
 30 | HLS_SOLUTION = solution1
 31 | 
 32 | # C++ compiler and flags for testbench
 33 | CXX = g++
 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17
 35 | INCLUDES = -I$(XILINX_HLS)/include
 36 | 
 37 | # Source files
 38 | SRC_FILES = $(DESIGN_NAME).cpp
 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp
 40 | TEST_DATA_DIR = ../../data
 41 | # Get a list of all text files in the data directory
 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt)
 43 | 
 44 | # Target names
 45 | TB_EXE = $(DESIGN_NAME)_test
 46 | CSIM_TCL = csim.tcl
 47 | CSYNTH_TCL = csynth.tcl
 48 | COSIM_TCL = cosim.tcl
 49 | EXPORT_TCL = export.tcl
 50 | IMPL_TCL = impl.tcl
 51 | 
 52 | # Calculate clock period in ns from MHz
 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc)
 54 | 
 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help
 56 | 
 57 | all: clean tb csim csynth
 58 | 
 59 | # Standalone testbench using GCC
 60 | tb:
 61 | 	@echo "Building standalone testbench..."
 62 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE)
 63 | 	@echo "Running standalone testbench..."
 64 | 	./$(TB_EXE)
 65 | 
 66 | # HLS C Simulation
 67 | csim:
 68 | ifeq ($(CSIM), 1)
 69 | 	@echo "Running HLS C Simulation..."
 70 | 	@echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL)
 71 | 	@echo "set_top peakPicker" >> $(CSIM_TCL)
 72 | 	@echo "add_files $(SRC_FILES)" >> $(CSIM_TCL)
 73 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL)
 74 | 	@for file in $(TEST_DATA_FILES); do \
 75 | 		echo "add_files -tb $$file" >> $(CSIM_TCL); \
 76 | 	done
 77 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL)
 78 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL)
 79 | 	@echo "csim_design" >> $(CSIM_TCL)
 80 | 	@echo "exit" >> $(CSIM_TCL)
 81 | 	$(HLS) -f $(CSIM_TCL)
 82 | endif
 83 | 
 84 | # HLS C Synthesis
 85 | csynth:
 86 | ifeq ($(CSYNTH), 1)
 87 | 	@echo "Running HLS C Synthesis..."
 88 | 	@echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL)
 89 | 	@echo "set_top peakPicker" >> $(CSYNTH_TCL)
 90 | 	@echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL)
 91 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL)
 92 | 	@for file in $(TEST_DATA_FILES); do \
 93 | 		echo "add_files -tb $$file" >> $(CSYNTH_TCL); \
 94 | 	done
 95 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL)
 96 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL)
 97 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL)
 98 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL)
 99 | 	@echo "csynth_design" >> $(CSYNTH_TCL)
100 | 	@echo "exit" >> $(CSYNTH_TCL)
101 | 	$(HLS) -f $(CSYNTH_TCL)
102 | endif
103 | 
104 | # HLS C/RTL Co-simulation
105 | cosim:
106 | ifeq ($(COSIM), 1)
107 | 	@echo "Running HLS C/RTL Co-simulation..."
108 | 	@echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL)
109 | 	@echo "set_top peakPicker" >> $(COSIM_TCL)
110 | 	@echo "add_files $(SRC_FILES)" >> $(COSIM_TCL)
111 | 	@echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL)
112 | 	@for file in $(TEST_DATA_FILES); do \
113 | 		echo "add_files -tb $$file" >> $(COSIM_TCL); \
114 | 	done
115 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL)
116 | 	@echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL)
117 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL)
118 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL)
119 | 	@echo "cosim_design" >> $(COSIM_TCL)
120 | 	@echo "exit" >> $(COSIM_TCL)
121 | 	$(HLS) -f $(COSIM_TCL)
122 | endif
123 | 
124 | # Export RTL as IP
125 | export_ip:
126 | ifeq ($(EXPORT_IP), 1)
127 | 	@echo "Exporting IP..."
128 | 	@echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL)
129 | 	@echo "set_top peakPicker" >> $(EXPORT_TCL)
130 | 	@echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL)
131 | 	@echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL)
132 | 	@for file in $(TEST_DATA_FILES); do \
133 | 		echo "add_files -tb $$file" >> $(EXPORT_TCL); \
134 | 	done
135 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL)
136 | 	@echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL)
137 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL)
138 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL)
139 | 	@echo "export_design -format ip_catalog" >> $(EXPORT_TCL)
140 | 	@echo "exit" >> $(EXPORT_TCL)
141 | 	$(HLS) -f $(EXPORT_TCL)
142 | endif
143 | 
144 | # Run Implementation in Vivado
145 | impl:
146 | ifeq ($(VIVADO_IMPL), 1)
147 | 	@echo "Running Vivado Implementation..."
148 | 	@echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL)
149 | 	@echo "set_top peakPicker" >> $(IMPL_TCL)
150 | 	@echo "add_files $(SRC_FILES)" >> $(IMPL_TCL)
151 | 	@echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL)
152 | 	@for file in $(TEST_DATA_FILES); do \
153 | 		echo "add_files -tb $$file" >> $(IMPL_TCL); \
154 | 	done
155 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL)
156 | 	@echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL)
157 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL)
158 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL)
159 | 	@echo "export_design -flow impl" >> $(IMPL_TCL)
160 | 	@echo "exit" >> $(IMPL_TCL)
161 | 	$(HLS) -f $(IMPL_TCL)
162 | endif
163 | 
164 | # Clean up
165 | clean:
166 | 	@echo "Cleaning up..."
167 | 	rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out
168 | 
169 | # Help information
170 | help:
171 | 	@echo "Makefile for HLS Peak Picker Project"
172 | 	@echo ""
173 | 	@echo "Targets:"
174 | 	@echo "  all        : Run clean, testbench, C simulation, and C synthesis"
175 | 	@echo "  tb         : Build and run standalone testbench"
176 | 	@echo "  csim       : Run HLS C simulation"
177 | 	@echo "  csynth     : Run HLS C synthesis"
178 | 	@echo "  cosim      : Run HLS C/RTL co-simulation"
179 | 	@echo "  export_ip  : Export RTL as IP catalog"
180 | 	@echo "  impl       : Run implementation in Vivado"
181 | 	@echo "  clean      : Clean up generated files"
182 | 	@echo "  help       : Display this help information"
183 | 	@echo ""
184 | 	@echo "Configuration (set to 1 to enable, 0 to disable):"
185 | 	@echo "  CSIM       = $(CSIM)  (C simulation)"
186 | 	@echo "  CSYNTH     = $(CSYNTH)  (C synthesis)"
187 | 	@echo "  COSIM      = $(COSIM)  (C/RTL co-simulation)"
188 | 	@echo "  EXPORT_IP  = $(EXPORT_IP)  (Export IP)"
189 | 	@echo "  VIVADO_IMPL= $(VIVADO_IMPL)  (Vivado implementation)"
190 | 	@echo ""
191 | 	@echo "Hardware Configuration:"
192 | 	@echo "  CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)"
193 | 	@echo "  CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)"
194 | 	@echo "  FPGA_PART  = $(FPGA_PART)  (FPGA part)"
195 | 	@echo "  CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)%  (Clock uncertainty)"
196 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for HLS Project
  2 | 
  3 | # Set the design name
  4 | DESIGN_NAME = peakPicker
  5 | 
  6 | # Configuration variables
  7 | CSIM = 1
  8 | CSYNTH = 1
  9 | COSIM = 1
 10 | EXPORT_IP = 1
 11 | VIVADO_IMPL = 1
 12 | 
 13 | # Hardware configuration
 14 | CLOCK_FREQ = 256
 15 | FPGA_PART = xc7k410t-ffg900-2
 16 | CLOCK_UNCERTAINTY = 12.5
 17 | 
 18 | # Vitis HLS installation path - modify this to match your installation
 19 | VITIS_HLS_PATH ?= /opt/Xilinx/Vitis_HLS/2023.2
 20 | # HLS compiler and flags - use full path to vitis_hls executable
 21 | HLS = $(VITIS_HLS_PATH)/bin/vitis_hls
 22 | 
 23 | # You can also set VITIS_HLS_PATH via environment variable before running make:
 24 | # export VITIS_HLS_PATH=/path/to/your/Vitis_HLS/installation
 25 | 
 26 | # Optional: Uncomment to source Vitis HLS settings before each HLS command
 27 | # HLS = source $(VITIS_HLS_PATH)/settings64.sh && $(VITIS_HLS_PATH)/bin/vitis_hls
 28 | 
 29 | HLS_PROJECT = proj_$(DESIGN_NAME)
 30 | HLS_SOLUTION = solution1
 31 | 
 32 | # C++ compiler and flags for testbench
 33 | CXX = g++
 34 | CXXFLAGS = -Wall -Wextra -O2 -std=c++17
 35 | INCLUDES = -I$(XILINX_HLS)/include
 36 | 
 37 | # Source files
 38 | SRC_FILES = $(DESIGN_NAME).cpp
 39 | TB_FILES = $(DESIGN_NAME)_tb.cpp
 40 | TEST_DATA_DIR = ../../data
 41 | # Get a list of all text files in the data directory
 42 | TEST_DATA_FILES := $(wildcard $(TEST_DATA_DIR)/*.txt)
 43 | 
 44 | # Target names
 45 | TB_EXE = $(DESIGN_NAME)_test
 46 | CSIM_TCL = csim.tcl
 47 | CSYNTH_TCL = csynth.tcl
 48 | COSIM_TCL = cosim.tcl
 49 | EXPORT_TCL = export.tcl
 50 | IMPL_TCL = impl.tcl
 51 | 
 52 | # Calculate clock period in ns from MHz
 53 | CLOCK_PERIOD := $(shell echo "scale=2; 1000 / $(CLOCK_FREQ)" | bc)
 54 | 
 55 | .PHONY: all clean tb csim csynth cosim export_ip impl help
 56 | 
 57 | all: clean tb csim csynth
 58 | 
 59 | # Standalone testbench using GCC
 60 | tb:
 61 | 	@echo "Building standalone testbench..."
 62 | 	$(CXX) $(CXXFLAGS) $(INCLUDES) $(SRC_FILES) $(TB_FILES) -o $(TB_EXE)
 63 | 	@echo "Running standalone testbench..."
 64 | 	./$(TB_EXE)
 65 | 
 66 | # HLS C Simulation
 67 | csim:
 68 | ifeq ($(CSIM), 1)
 69 | 	@echo "Running HLS C Simulation..."
 70 | 	@echo "open_project $(HLS_PROJECT)" > $(CSIM_TCL)
 71 | 	@echo "set_top peakPicker" >> $(CSIM_TCL)
 72 | 	@echo "add_files $(SRC_FILES)" >> $(CSIM_TCL)
 73 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSIM_TCL)
 74 | 	@for file in $(TEST_DATA_FILES); do \
 75 | 		echo "add_files -tb $$file" >> $(CSIM_TCL); \
 76 | 	done
 77 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSIM_TCL)
 78 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSIM_TCL)
 79 | 	@echo "csim_design" >> $(CSIM_TCL)
 80 | 	@echo "exit" >> $(CSIM_TCL)
 81 | 	$(HLS) -f $(CSIM_TCL)
 82 | endif
 83 | 
 84 | # HLS C Synthesis
 85 | csynth:
 86 | ifeq ($(CSYNTH), 1)
 87 | 	@echo "Running HLS C Synthesis..."
 88 | 	@echo "open_project $(HLS_PROJECT)" > $(CSYNTH_TCL)
 89 | 	@echo "set_top peakPicker" >> $(CSYNTH_TCL)
 90 | 	@echo "add_files $(SRC_FILES)" >> $(CSYNTH_TCL)
 91 | 	@echo "add_files -tb $(TB_FILES)" >> $(CSYNTH_TCL)
 92 | 	@for file in $(TEST_DATA_FILES); do \
 93 | 		echo "add_files -tb $$file" >> $(CSYNTH_TCL); \
 94 | 	done
 95 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(CSYNTH_TCL)
 96 | 	@echo "set_part {$(FPGA_PART)}" >> $(CSYNTH_TCL)
 97 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(CSYNTH_TCL)
 98 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(CSYNTH_TCL)
 99 | 	@echo "csynth_design" >> $(CSYNTH_TCL)
100 | 	@echo "exit" >> $(CSYNTH_TCL)
101 | 	$(HLS) -f $(CSYNTH_TCL)
102 | endif
103 | 
104 | # HLS C/RTL Co-simulation
105 | cosim:
106 | ifeq ($(COSIM), 1)
107 | 	@echo "Running HLS C/RTL Co-simulation..."
108 | 	@echo "open_project $(HLS_PROJECT)" > $(COSIM_TCL)
109 | 	@echo "set_top peakPicker" >> $(COSIM_TCL)
110 | 	@echo "add_files $(SRC_FILES)" >> $(COSIM_TCL)
111 | 	@echo "add_files -tb $(TB_FILES)" >> $(COSIM_TCL)
112 | 	@for file in $(TEST_DATA_FILES); do \
113 | 		echo "add_files -tb $$file" >> $(COSIM_TCL); \
114 | 	done
115 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(COSIM_TCL)
116 | 	@echo "set_part {$(FPGA_PART)}" >> $(COSIM_TCL)
117 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(COSIM_TCL)
118 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(COSIM_TCL)
119 | 	@echo "cosim_design" >> $(COSIM_TCL)
120 | 	@echo "exit" >> $(COSIM_TCL)
121 | 	$(HLS) -f $(COSIM_TCL)
122 | endif
123 | 
124 | # Export RTL as IP
125 | export_ip:
126 | ifeq ($(EXPORT_IP), 1)
127 | 	@echo "Exporting IP..."
128 | 	@echo "open_project $(HLS_PROJECT)" > $(EXPORT_TCL)
129 | 	@echo "set_top peakPicker" >> $(EXPORT_TCL)
130 | 	@echo "add_files $(SRC_FILES)" >> $(EXPORT_TCL)
131 | 	@echo "add_files -tb $(TB_FILES)" >> $(EXPORT_TCL)
132 | 	@for file in $(TEST_DATA_FILES); do \
133 | 		echo "add_files -tb $$file" >> $(EXPORT_TCL); \
134 | 	done
135 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(EXPORT_TCL)
136 | 	@echo "set_part {$(FPGA_PART)}" >> $(EXPORT_TCL)
137 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(EXPORT_TCL)
138 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(EXPORT_TCL)
139 | 	@echo "export_design -format ip_catalog" >> $(EXPORT_TCL)
140 | 	@echo "exit" >> $(EXPORT_TCL)
141 | 	$(HLS) -f $(EXPORT_TCL)
142 | endif
143 | 
144 | # Run Implementation in Vivado
145 | impl:
146 | ifeq ($(VIVADO_IMPL), 1)
147 | 	@echo "Running Vivado Implementation..."
148 | 	@echo "open_project $(HLS_PROJECT)" > $(IMPL_TCL)
149 | 	@echo "set_top peakPicker" >> $(IMPL_TCL)
150 | 	@echo "add_files $(SRC_FILES)" >> $(IMPL_TCL)
151 | 	@echo "add_files -tb $(TB_FILES)" >> $(IMPL_TCL)
152 | 	@for file in $(TEST_DATA_FILES); do \
153 | 		echo "add_files -tb $$file" >> $(IMPL_TCL); \
154 | 	done
155 | 	@echo "open_solution $(HLS_SOLUTION)" >> $(IMPL_TCL)
156 | 	@echo "set_part {$(FPGA_PART)}" >> $(IMPL_TCL)
157 | 	@echo "create_clock -period $(CLOCK_PERIOD) -name default" >> $(IMPL_TCL)
158 | 	@echo "set_clock_uncertainty $(CLOCK_UNCERTAINTY)%" >> $(IMPL_TCL)
159 | 	@echo "export_design -flow impl" >> $(IMPL_TCL)
160 | 	@echo "exit" >> $(IMPL_TCL)
161 | 	$(HLS) -f $(IMPL_TCL)
162 | endif
163 | 
164 | # Clean up
165 | clean:
166 | 	@echo "Cleaning up..."
167 | 	rm -rf $(HLS_PROJECT) *.dat *.log *.tcl $(TB_EXE) *.o *.out
168 | 
169 | # Help information
170 | help:
171 | 	@echo "Makefile for HLS Peak Picker Project"
172 | 	@echo ""
173 | 	@echo "Targets:"
174 | 	@echo "  all        : Run clean, testbench, C simulation, and C synthesis"
175 | 	@echo "  tb         : Build and run standalone testbench"
176 | 	@echo "  csim       : Run HLS C simulation"
177 | 	@echo "  csynth     : Run HLS C synthesis"
178 | 	@echo "  cosim      : Run HLS C/RTL co-simulation"
179 | 	@echo "  export_ip  : Export RTL as IP catalog"
180 | 	@echo "  impl       : Run implementation in Vivado"
181 | 	@echo "  clean      : Clean up generated files"
182 | 	@echo "  help       : Display this help information"
183 | 	@echo ""
184 | 	@echo "Configuration (set to 1 to enable, 0 to disable):"
185 | 	@echo "  CSIM       = $(CSIM)  (C simulation)"
186 | 	@echo "  CSYNTH     = $(CSYNTH)  (C synthesis)"
187 | 	@echo "  COSIM      = $(COSIM)  (C/RTL co-simulation)"
188 | 	@echo "  EXPORT_IP  = $(EXPORT_IP)  (Export IP)"
189 | 	@echo "  VIVADO_IMPL= $(VIVADO_IMPL)  (Vivado implementation)"
190 | 	@echo ""
191 | 	@echo "Hardware Configuration:"
192 | 	@echo "  CLOCK_FREQ = $(CLOCK_FREQ)MHz (Clock frequency)"
193 | 	@echo "  CLOCK_PERIOD = $(CLOCK_PERIOD)ns (Clock period)"
194 | 	@echo "  FPGA_PART  = $(FPGA_PART)  (FPGA part)"
195 | 	@echo "  CLOCK_UNCERTAINTY = $(CLOCK_UNCERTAINTY)%  (Clock uncertainty)"
196 | 


--------------------------------------------------------------------------------
/prompts/readme_generation.md:
--------------------------------------------------------------------------------
  1 | # README Generation Prompt
  2 | 
  3 | ## Context
  4 | You are tasked with creating a comprehensive README document for an FPGA hardware accelerator design. This document will be the primary reference for users, developers, and stakeholders who interact with this hardware component.
  5 | 
  6 | ## Component Overview
  7 | - **Component Name**: {component_name}
  8 | - **Design Purpose**: A hardware accelerator implemented on an FPGA
  9 | - **Generation Method**: AI-assisted design using LLM ({generation_model})
 10 | - **Target Platform**: Xilinx FPGA ({fpga_part})
 11 | 
 12 | ## Instructions
 13 | 
 14 | Create a comprehensive README.md file following this structure:
 15 | 
 16 | ### 1. Title and Introduction
 17 | - Clear title with component name
 18 | - Brief overview of what the component does
 19 | - Key features and capabilities
 20 | - Target applications
 21 | 
 22 | ### 2. Hardware Architecture
 23 | - High-level block diagram description
 24 | - Key architectural components
 25 | - Data flow explanation
 26 | - Interface specifications
 27 | - Include design decisions and their rationales
 28 | 
 29 | **Architecture Visualization**: Include a Mermaid flowchart diagram showing the main components and data flow. Example:
 30 | 
 31 | ```mermaid
 32 | flowchart TD
 33 |     A["Input Interface"] --> B["Core Processing Logic"]
 34 |     B --> C["Output Interface"]
 35 |     B --> D["Control Unit"]
 36 |     E["Memory"] <--> B
 37 | ```
 38 | 
 39 | ### 3. Implementation Details
 40 | - HLS directives and optimizations used
 41 | - Resource utilization (LUTs, FFs, DSPs, BRAMs)
 42 | - Critical design parameters
 43 | - Key algorithms and their hardware mapping
 44 | 
 45 | **Algorithm Visualization**: Include a Mermaid flowchart or sequence diagram showing the algorithm implementation. Example of algorithm flowchart:
 46 | 
 47 | ```mermaid
 48 | flowchart LR
 49 |     A["Input Data"] --> B["Stage 1: Preprocessing"]
 50 |     B --> C["Stage 2: Computation"]
 51 |     C --> D["Stage 3: Postprocessing"]
 52 |     D --> E["Output Result"]
 53 |     
 54 |     subgraph "Core Algorithm"
 55 |     B
 56 |     C
 57 |     D
 58 |     end
 59 | ```
 60 | 
 61 | ### 4. Performance Metrics
 62 | - Latency (in cycles)
 63 | - Throughput
 64 | - Clock frequency
 65 | - Resource efficiency
 66 | - Comparative analysis against baseline if available
 67 | 
 68 | **Performance Visualization**: Present performance metrics in clear tables and include a state diagram if applicable. Example:
 69 | 
 70 | | Metric         | Value    | Unit       |
 71 | |----------------|----------|------------|
 72 | | Latency        | X        | cycles     |
 73 | | Clock Period   | Y        | ns         |
 74 | | Throughput     | Z        | items/cycle|
 75 | | Resource Usage | See table below |     |
 76 | 
 77 | ## Resource Utilization
 78 | 
 79 | | Resource | Utilization | Available | Utilization % |
 80 | |----------|-------------|-----------|---------------|
 81 | | LUT      | X           | X_total   | X_percent     |
 82 | | FF       | Y           | Y_total   | Y_percent     |
 83 | | DSP      | Z           | Z_total   | Z_percent     |
 84 | | BRAM     | W           | W_total   | W_percent     |
 85 | 
 86 | ## Timing
 87 | 
 88 | | Implementation | Target (ns) | Target (MHz) | Post-Synthesis (ns) | Post-Synthesis (MHz) | Post-Route (ns) | Post-Route (MHz) |
 89 | |---------------|------------|-------------|-------------------|---------------------|----------------|----------------|
 90 | | solution1 | 3.90 | 256.00 | u.uu | vvv.vv | x.xx | yyy.yy |
 91 | 
 92 | ## Latency
 93 | 
 94 | | Implementation | Min (cycles) | Max (cycles) | Average (cycles) | Throughput (samples/cycle) |
 95 | |---------------|-------------|-------------|-----------------|-----------------------------|
 96 | | solution1 | X | Y | - | - |
 97 | 
 98 | 
 99 | For state machines, use:
100 | 
101 | ```mermaid
102 | stateDiagram-v2
103 |     [*] --> Idle
104 |     Idle --> Processing: "start_signal"
105 |     Processing --> Done: "processing_complete"
106 |     Done --> Idle: "reset"
107 | ```
108 | 
109 | ### 5. Setup and Usage
110 | - Prerequisites (tools, versions)
111 | - Build instructions
112 | - Integration guidance
113 | - Testbench explanation
114 | - Common usage patterns
115 | - API documentation if applicable
116 | 
117 | **Setup Visualization**: If applicable, include a sequence diagram showing the setup and usage flow:
118 | 
119 | ```mermaid
120 | sequenceDiagram
121 |     participant User
122 |     participant Build System
123 |     participant FPGA
124 |     
125 |     User->>Build System: "Run build script"
126 |     Build System->>FPGA: "Generate bitstream"
127 |     FPGA-->>Build System: "Bitstream ready"
128 |     Build System-->>User: "Build complete"
129 |     User->>FPGA: "Load design"
130 |     User->>FPGA: "Send data"
131 |     FPGA-->>User: "Return results"
132 | ```
133 | 
134 | ### 6. Results and Validation
135 | - Verification methodology
136 | - Simulation results
137 | - Hardware testing results if available
138 | - Performance validation
139 | 
140 | **Results Visualization**: Present validation results in tables and comparison charts where applicable.
141 | 
142 | ### 7. Development History
143 | - Design evolution
144 | - Challenges encountered and their solutions
145 | - Optimization iterations
146 | - AI assistance insights
147 | 
148 | ### 8. Future Work
149 | - Potential improvements
150 | - Scaling opportunities
151 | - Additional features
152 | 
153 | ## Source Information
154 | Use the following source files and metrics to inform your documentation:
155 | 
156 | ### Source Code
157 | ```cpp
158 | // Header file ({component_name}.hpp)
159 | {header_code}
160 | ```
161 | 
162 | ```cpp
163 | // Implementation file ({component_name}.cpp)
164 | {implementation_code}
165 | ```
166 | 
167 | ```cpp
168 | // Testbench file ({component_name}_tb.cpp)
169 | {testbench_code}
170 | ```
171 | 
172 | ### Performance Metrics
173 | {performance_metrics}
174 | 
175 | ### Implementation Challenges
176 | {errors_encountered}
177 | 
178 | ### Debugging Methods
179 | {debugging_methods}
180 | 
181 | ## Diagram Examples
182 | The following are examples of different types of Mermaid diagrams you can use:
183 | 
184 | {diagram_examples}
185 | 
186 | ## Chart Examples
187 | The following are examples of different types of tables/charts for performance data:
188 | 
189 | {chart_examples}
190 | 
191 | ## Style Guidelines
192 | - Use clear, technical language appropriate for engineering documentation
193 | - Include code snippets where helpful
194 | - Use markdown formatting features (headers, lists, tables, code blocks)
195 | - Be concise but comprehensive
196 | - Focus on practical usage and technical details
197 | - Highlight AI-assisted aspects of the development process
198 | - Maintain a professional tone
199 | - Make effective use of diagrams and visualizations for clarity
200 | - Use Mermaid diagrams for architecture, data flow, and algorithms
201 | - Use tables to present performance metrics and comparative analysis
202 | 
203 | Your README should serve as both a technical reference and a guide for someone who wants to understand, use, or modify the hardware component.
204 | 
205 | ## BEST PRACTICES
206 | 
207 | - Successfully generated documentation on 2025-04-06
208 | - Successfully generated documentation on 2025-04-06
209 | - Successfully generated documentation on 2025-04-06
210 | - Successfully generated documentation on 2025-04-06
211 | - Successfully generated documentation on 2025-04-06
212 | - Successfully generated documentation on 2025-04-06
213 | - Successfully generated documentation on 2025-04-06
214 | - Successfully generated documentation on 2025-04-06
215 | - Successfully generated documentation on 2025-04-06
216 | 


--------------------------------------------------------------------------------
/prompts/paper_generation.md:
--------------------------------------------------------------------------------
  1 | # Academic Paper Generation Prompt
  2 | 
  3 | ## Context
  4 | You are tasked with writing an academic research paper about an FPGA hardware accelerator design that was developed using an AI-assisted design methodology. This paper should follow academic standards and contribute to the literature on hardware acceleration and AI-assisted design.
  5 | 
  6 | ## Component Information
  7 | - **Component Name**: {component_name}
  8 | - **LLM Used for Generation**: {generation_model}
  9 | - **Target FPGA Platform**: {fpga_part}
 10 | - **Domain**: Hardware Acceleration for Digital Signal Processing/Machine Learning/etc.
 11 | 
 12 | ## Paper Structure Requirements
 13 | 
 14 | Create a complete academic paper in markdown format with the following structure:
 15 | 
 16 | ### 1. Title and Authors
 17 | - Create an appropriate academic title for this work
 18 | - List authors as the research team (placeholder)
 19 | - Include institutional affiliation
 20 | 
 21 | ### 2. Abstract (200-250 words)
 22 | - Summarize the paper's content
 23 | - State the problem addressed
 24 | - Describe the approach using AI-assisted design
 25 | - Highlight key results and contributions
 26 | - Mention broader impact
 27 | 
 28 | ### 3. Introduction
 29 | - Context and background of the problem
 30 | - Motivation for hardware acceleration
 31 | - Challenges in traditional FPGA design
 32 | - Introduction to AI-assisted hardware design
 33 | - Contribution statement
 34 | - Paper organization
 35 | 
 36 | ### 4. Related Work (2-3 subsections)
 37 | - Prior work on hardware acceleration for similar applications
 38 | - Previous research on automated HLS design
 39 | - AI-assisted hardware design methodologies
 40 | - Positioning of current work within literature
 41 | 
 42 | ### 5. Methodology
 43 | - Overall design approach
 44 | - AI-assisted design workflow description
 45 | - Prompt engineering for hardware generation
 46 | - Iteration and refinement process
 47 | - Verification methodology
 48 | 
 49 | **Workflow Visualization**: Include a Mermaid diagram showing the AI-assisted design workflow. Example:
 50 | 
 51 | ```mermaid
 52 | flowchart TD
 53 |     A["Problem Definition"] --> B["Prompt Engineering"]
 54 |     B --> C["LLM Code Generation"]
 55 |     C --> D["Code Verification"]
 56 |     D -->|"Errors"| E["Debugging"]
 57 |     E --> C
 58 |     D -->|"Success"| F["Implementation"]
 59 |     F --> G["Performance Analysis"]
 60 |     G -->|"Optimization Needed"| H["Optimization Prompts"]
 61 |     H --> C
 62 |     G -->|"Acceptable"| I["Final Design"]
 63 | ```
 64 | 
 65 | ### 6. Design Architecture
 66 | - System-level architecture
 67 | - Component interfaces and data flow
 68 | - Key algorithmic components
 69 | - Design constraints and considerations
 70 | - HLS implementation details
 71 | - Optimizations applied
 72 | 
 73 | **Architecture Visualization**: Include a detailed Mermaid diagram showing the system architecture and data flow. Example:
 74 | 
 75 | ```mermaid
 76 | flowchart LR
 77 |     A["External Input"] --> B["Input Interface"]
 78 |     B --> C["Processing Module"]
 79 |     
 80 |     subgraph "Core Accelerator"
 81 |     C --> D["Algorithm Stage 1"]
 82 |     D --> E["Algorithm Stage 2"]
 83 |     E --> F["Algorithm Stage 3"]
 84 |     end
 85 |     
 86 |     F --> G["Output Interface"]
 87 |     G --> H["External Output"]
 88 |     
 89 |     I["Control Logic"] --> C
 90 |     I --> D
 91 |     I --> E
 92 |     I --> F
 93 | ```
 94 | 
 95 | ### 7. Implementation
 96 | - HLS directives and pragmas
 97 | - Resource allocation strategies
 98 | - Pipeline and parallelism exploitations
 99 | - Memory architecture and data movement
100 | - Critical path analysis
101 | 
102 | **Implementation Visualization**: Include a Mermaid diagram showing key optimization strategies or pipeline structure. Example:
103 | 
104 | ```mermaid
105 | gantt
106 |     title Pipeline Structure
107 |     dateFormat s
108 |     axisFormat %S
109 |     
110 |     section Without Pipelining
111 |     Stage 1      :a1, 0, 3s
112 |     Stage 2      :a2, after a1, 2s
113 |     Stage 3      :a3, after a2, 2s
114 |     
115 |     section With Pipelining
116 |     Stage 1 (Iter 1)  :b1, 0, 3s
117 |     Stage 2 (Iter 1)  :b2, after b1, 2s
118 |     Stage 1 (Iter 2)  :b3, after b1, 3s
119 |     Stage 3 (Iter 1)  :b4, after b2, 2s
120 |     Stage 2 (Iter 2)  :b5, after b3, 2s
121 | ```
122 | 
123 | ### 8. Experimental Results
124 | - Experimental setup
125 | - Performance metrics:
126 |   - Resource utilization
127 |   - Timing and frequency
128 |   - Latency and throughput
129 |   - Power consumption (if available)
130 | - Comparative analysis with:
131 |   - Manual implementations
132 |   - Other automated approaches
133 |   - Software-only solutions
134 | - Discussion of results
135 | 
136 | **Results Visualization**: Present results in clear tables and comparison charts:
137 | 
138 | ```
139 | | Implementation | LUTs | FFs | DSPs | BRAMs | Clock Freq (MHz) | Latency (cycles) |
140 | |----------------|------|-----|------|-------|------------------|------------------|
141 | | Our Work       | X    | Y   | Z    | W     | F                | L                |
142 | | Baseline 1     | X1   | Y1  | Z1   | W1    | F1               | L1               |
143 | | Baseline 2     | X2   | Y2  | Z2   | W2    | F2               | L2               |
144 | ```
145 | 
146 | Consider including Mermaid charts for comparative analysis:
147 | 
148 | ```mermaid
149 | pie title Resource Distribution
150 |     "LUTs" : X
151 |     "FFs" : Y
152 |     "DSPs" : Z
153 |     "BRAMs" : W
154 | ```
155 | 
156 | ### 9. Analysis of AI-Assisted Design Process
157 | - Analysis of LLM strengths/weaknesses in hardware design
158 | - Error patterns and resolution strategies
159 | - Human-AI collaboration insights
160 | - Design quality assessment
161 | - Development efficiency metrics
162 | 
163 | **Process Visualization**: Show the iteration process and error resolution:
164 | 
165 | ```mermaid
166 | stateDiagram-v2
167 |     [*] --> Prompt
168 |     Prompt --> Generation
169 |     Generation --> Verification
170 |     Verification --> Success
171 |     Verification --> Errors
172 |     Errors --> Analysis
173 |     Analysis --> Refinement
174 |     Refinement --> Prompt
175 |     Success --> [*]
176 | ```
177 | 
178 | ### 10. Discussion
179 | - Interpretation of results
180 | - Limitations of the approach
181 | - Generalizability of the methodology
182 | - Trade-offs identified
183 | - Lessons learned
184 | 
185 | ### 11. Future Work
186 | - Potential improvements
187 | - Broader applications
188 | - Research directions
189 | - Scaling to more complex designs
190 | 
191 | ### 12. Conclusion
192 | - Summary of contributions
193 | - Key findings
194 | - Broader impact
195 | - Closing thoughts
196 | 
197 | ### 13. References
198 | - Include relevant references to:
199 |   - Hardware acceleration literature
200 |   - High-Level Synthesis research
201 |   - AI in design automation
202 |   - Relevant applications
203 |   - Methodological papers
204 | 
205 | ## Source Information
206 | Use the following source information to inform your paper:
207 | 
208 | ### Source Code Architecture
209 | ```cpp
210 | // Header file ({component_name}.hpp)
211 | {header_code}
212 | ```
213 | 
214 | ```cpp
215 | // Implementation file ({component_name}.cpp)
216 | {implementation_code}
217 | ```
218 | 
219 | ### Performance Metrics
220 | {performance_metrics}
221 | 
222 | ### Design Process
223 | - AI Generation Model: {generation_model}
224 | - Workflow Steps: {workflow_steps}
225 | - Successful Steps: {successful_steps}
226 | - Error Steps: {error_steps}
227 | 
228 | ### Implementation Challenges
229 | {errors_encountered}
230 | 
231 | ### Debugging Methods
232 | {debugging_methods}
233 | 
234 | ## Diagram Examples
235 | The following are examples of different types of Mermaid diagrams you can use:
236 | 
237 | {diagram_examples}
238 | 
239 | ## Chart Examples
240 | The following are examples of different types of tables/charts for performance data:
241 | 
242 | {chart_examples}
243 | 
244 | ## Academic Style Guidelines
245 | - Use formal academic language
246 | - Present objective analysis of results
247 | - Support claims with data from implementation
248 | - Discuss limitations honestly
249 | - Position work in relation to existing literature
250 | - Use the third person (avoid "I", "we", "you")
251 | - Maintain scientific rigor throughout
252 | - Use passive voice where appropriate
253 | - Include a balanced mix of technical details and higher-level discussion
254 | - Present results visually through diagrams, charts and tables
255 | - Use Mermaid diagrams for architecture, methodology, and algorithmic visualizations
256 | - Present performance results in tables with comparative analysis
257 | 
258 | Your paper should contribute to the academic discourse on AI-assisted hardware design while presenting concrete technical achievements and insights.
259 | 
260 | ## BEST PRACTICES
261 | - Successfully generated documentation on 2025-04-06
262 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/peakPicker_tb.cpp:
--------------------------------------------------------------------------------
  1 | /* AUTO-EDITED BY DEBUG ASSISTANT */
  2 | #include "peakPicker.hpp"
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include <vector>
  6 | #include <string>
  7 | #include <cmath>    // For std::abs
  8 | #include <limits>   // For numeric_limits
  9 | #include <iomanip>  // For std::setprecision
 10 | 
 11 | // Define input/output file names
 12 | const std::string XCORR_INPUT_FILE = "pssCorrMagSq_3_in.txt"; // Matches MATLAB TB
 13 | const std::string THRESHOLD_INPUT_FILE = "threshold_in.txt"; // Matches MATLAB TB
 14 | const std::string REF_OUTPUT_FILE = "locations_3_ref.txt";   // Matches MATLAB TB
 15 | // const std::string REF_OUTPUT_FILE = "peakLocs_out.txt"; // Alternative reference file name
 16 | 
 17 | // Function to read data from a file into a vector of doubles
 18 | bool readDataFile(const std::string& filename, std::vector<double>& data) {
 19 |     std::ifstream infile(filename);
 20 |     if (!infile.is_open()) {
 21 |         std::cerr << "Error: Could not open file: " << filename << std::endl;
 22 |         return false;
 23 |     }
 24 |     double value;
 25 |     while (infile >> value) {
 26 |         data.push_back(value);
 27 |     }
 28 |     // Check for read errors (e.g., non-numeric data) after the loop
 29 |     if (infile.bad()) {
 30 |         std::cerr << "Error: Failed reading data from file: " << filename << std::endl;
 31 |         infile.close();
 32 |         return false;
 33 |     }
 34 |     // Check if EOF was reached OR if a formatting error stopped the loop early
 35 |     // infile.fail() is true if >> failed (e.g., bad format), but not for EOF
 36 |     // infile.eof() is true if >> tried to read past EOF
 37 |     if (!infile.eof() && infile.fail()) {
 38 |          std::cerr << "Warning: Input format error suspected in file: " << filename << std::endl;
 39 |          // Continue, but be aware data might be incomplete
 40 |     }
 41 |     infile.close();
 42 |     if (data.empty() && !infile.eof()) { // Check if file was opened but no data read
 43 |        std::cerr << "Warning: No data read from file (or file empty): " << filename << std::endl;
 44 |     }
 45 |     std::cout << "Read " << data.size() << " values from " << filename << std::endl;
 46 |     return true;
 47 | }
 48 | 
 49 | // Function to read integer data (locations) from a file
 50 | bool readIntDataFile(const std::string& filename, std::vector<int>& data) {
 51 |     std::ifstream infile(filename);
 52 |     if (!infile.is_open()) {
 53 |         std::cerr << "Error: Could not open file: " << filename << std::endl;
 54 |         return false;
 55 |     }
 56 |     int value;
 57 |     while (infile >> value) {
 58 |         data.push_back(value);
 59 |     }
 60 |      // Check for read errors (e.g., non-numeric data) after the loop
 61 |     if (infile.bad()) {
 62 |         std::cerr << "Error: Failed reading data from file: " << filename << std::endl;
 63 |         infile.close();
 64 |         return false;
 65 |     }
 66 |     // Check if EOF was reached OR if a formatting error stopped the loop early
 67 |     if (!infile.eof() && infile.fail()) {
 68 |          std::cerr << "Warning: Input format error suspected in file: " << filename << std::endl;
 69 |          // Continue, but be aware data might be incomplete
 70 |     }
 71 |     infile.close();
 72 |      if (data.empty() && !infile.eof()) { // Check if file was opened but no data read
 73 |        std::cerr << "Warning: No data read from file (or file empty): " << filename << std::endl;
 74 |     }
 75 |     std::cout << "Read " << data.size() << " values from " << filename << std::endl;
 76 |     return true;
 77 | }
 78 | 
 79 | 
 80 | int main() {
 81 |     std::cout << "--- Starting Peak Picker Testbench ---" << std::endl;
 82 | 
 83 |     // --- Data Loading ---
 84 |     std::vector<double> xcorrVec, thresholdVec;
 85 |     std::vector<int> refLocsVec; // Use int for reference locations from file
 86 | 
 87 |     std::cout << "Loading input data..." << std::endl;
 88 |     if (!readDataFile(XCORR_INPUT_FILE, xcorrVec)) return 1;
 89 |     if (!readDataFile(THRESHOLD_INPUT_FILE, thresholdVec)) return 1;
 90 | 
 91 |     std::cout << "Loading reference output data..." << std::endl;
 92 |     // Assuming reference file contains 1-based indices from MATLAB
 93 |     if (!readIntDataFile(REF_OUTPUT_FILE, refLocsVec)) return 1;
 94 | 
 95 |     // Basic input validation
 96 |     if (xcorrVec.size() != thresholdVec.size()) {
 97 |         std::cerr << "Error: Input xcorr size (" << xcorrVec.size()
 98 |                   << ") does not match threshold size (" << thresholdVec.size() << ")" << std::endl;
 99 |         return 1;
100 |     }
101 |     if (xcorrVec.empty()) {
102 |          std::cerr << "Error: Input data vectors are empty (or failed to load)." << std::endl;
103 |          return 1;
104 |     }
105 | 
106 |     int numSamples = xcorrVec.size();
107 |     std::cout << "Number of samples to process: " << numSamples << std::endl;
108 | 
109 |     // --- Stream Preparation ---
110 |     hls::stream<Data_t> xcorrStream("xcorrStream");
111 |     hls::stream<Data_t> thresholdStream("thresholdStream");
112 |     hls::stream<Index_t> locationStream("locationStream");
113 | 
114 |     std::cout << "Populating input streams..." << std::endl;
115 |     for (int i = 0; i < numSamples; ++i) {
116 |         // Convert double to fixed-point Data_t
117 |         // Add checks here if concerned about out-of-range conversions, though
118 |         // ap_fixed usually handles this via saturation or wrapping based on config.
119 |         xcorrStream.write(static_cast<Data_t>(xcorrVec[i]));
120 |         thresholdStream.write(static_cast<Data_t>(thresholdVec[i]));
121 |     }
122 |     std::cout << "Input streams populated." << std::endl;
123 | 
124 |     // --- Call the DUT (Device Under Test) ---
125 |     std::cout << "Calling HLS peakPicker function..." << std::endl;
126 |     peakPicker(xcorrStream, thresholdStream, locationStream, numSamples);
127 |     std::cout << "HLS peakPicker function finished." << std::endl;
128 | 
129 |     // --- Collect Results ---
130 |     std::vector<Index_t> actualLocsVec; // Store results from DUT (0-based)
131 |     std::cout << "Collecting results from output stream..." << std::endl;
132 |     while (!locationStream.empty()) {
133 |         actualLocsVec.push_back(locationStream.read());
134 |     }
135 |      std::cout << "Collected " << actualLocsVec.size() << " peak locations." << std::endl;
136 | 
137 |     // --- Verification ---
138 |     std::cout << "Comparing HLS results with reference..." << std::endl;
139 |     bool match = true;
140 |     int errorCount = 0;
141 | 
142 |     // Compare number of peaks found
143 |     if (actualLocsVec.size() != refLocsVec.size()) {
144 |         std::cerr << "Error: Mismatch in number of detected peaks!" << std::endl;
145 |         std::cerr << "  Expected: " << refLocsVec.size() << std::endl;
146 |         std::cerr << "  Actual:   " << actualLocsVec.size() << std::endl;
147 |         match = false;
148 |         // Don't stop here, try comparing the elements we do have if sizes are different
149 |         // errorCount will increase significantly anyway.
150 |     } else {
151 |         std::cout << "Number of peaks matches reference (" << refLocsVec.size() << ")." << std::endl;
152 |     }
153 | 
154 |     // Compare actual peak locations element by element
155 |     size_t comparisonLimit = std::min(actualLocsVec.size(), refLocsVec.size());
156 |     for (size_t i = 0; i < comparisonLimit; ++i) {
157 |         // Convert DUT output (ap_uint<INDEX_W>) to int for comparison
158 |         int actualLoc = static_cast<int>(actualLocsVec[i]); // DUT output (0-based)
159 |         int refLoc = refLocsVec[i];                         // Reference file value (assumed 1-based)
160 | 
161 |         // --- MODIFIED COMPARISON ---
162 |         // Adjust the 1-based reference index to 0-based for comparison
163 |         int expectedLoc_0based = refLoc - 1;
164 | 
165 |         if (actualLoc != expectedLoc_0based) {
166 |             if (errorCount < 20) { // Print more mismatches if they occur
167 |                std::cerr << "Mismatch at output index " << i << ":" << std::endl;
168 |                std::cerr << "  Expected (0-based): " << expectedLoc_0based << " (from ref file value " << refLoc << ")" << std::endl;
169 |                std::cerr << "  Actual (0-based):   " << actualLoc << std::endl;
170 |             } else if (errorCount == 20) {
171 |                std::cerr << "Further mismatches suppressed..." << std::endl;
172 |             }
173 |             match = false;
174 |             errorCount++;
175 |         }
176 |         // --- END MODIFIED COMPARISON ---
177 |     }
178 | 
179 |      // Report if sizes mismatched even if no element mismatches were found within comparisonLimit
180 |      if (actualLocsVec.size() != refLocsVec.size()) {
181 |          match = false; // Ensure test fails if sizes differ
182 |          if (errorCount == 0) { // Only print this if no element mismatches were logged
183 |              std::cerr << "Mismatch due to differing number of peaks." << std::endl;
184 |          }
185 |      }
186 | 
187 |      if (errorCount > 0) {
188 |          std::cerr << "Total mismatches found: " << errorCount << std::endl;
189 |      }
190 | 
191 | 
192 |     // --- Report Results ---
193 |     if (match) {
194 |         std::cout << "----------------------------------------" << std::endl;
195 |         std::cout << "--- Test PASSED ---" << std::endl;
196 |         std::cout << "HLS implementation output matches the reference output (assuming 1-based reference indices)." << std::endl;
197 |         std::cout << "----------------------------------------" << std::endl;
198 |         return 0; // Success
199 |     } else {
200 |         std::cout << "----------------------------------------" << std::endl;
201 |         std::cout << "--- Test FAILED ---" << std::endl;
202 |         std::cout << "HLS implementation output does NOT match the reference output." << std::endl;
203 |         std::cout << "----------------------------------------" << std::endl;
204 |         return 1; // Failure
205 |     }
206 | }


--------------------------------------------------------------------------------
/example/vitis_hls.log:
--------------------------------------------------------------------------------
  1 | 
  2 | ****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2024.2.2 (64-bit)
  3 |   **** SW Build 6049644 on Mar  5 2025
  4 |   **** IP Build 6050500 on Thu Mar  6 23:33:39 MST 2025
  5 |   **** SharedData Build 6060542 on Thu Mar 06 10:31:07 MST 2025
  6 |   **** Start of session at: Fri Aug  1 14:42:38 2025
  7 |     ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
  8 |     ** Copyright 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
  9 | 
 10 | source /opt/Xilinx/Vitis/2024.2/scripts/vitis_hls/hls.tcl -notrace
 11 | INFO: [HLS 200-10] For user 'amd' on host 'amd' (Linux_x86_64 version 6.8.0-65-generic) on Fri Aug 01 14:42:39 AEST 2025
 12 | INFO: [HLS 200-10] On os Ubuntu 22.04.5 LTS
 13 | INFO: [HLS 200-10] In directory '/home/amd/UTS/llm-fpga-design/example'
 14 | WARNING: [HLS 200-2053] The vitis_hls executable is deprecated. Consider using vitis-run --mode hls --tcl
 15 | Sourcing Tcl script 'csynth.tcl'
 16 | INFO: [HLS 200-1510] Running: open_project proj_peakPicker 
 17 | INFO: [HLS 200-10] Creating and opening project '/home/amd/UTS/llm-fpga-design/example/proj_peakPicker'.
 18 | INFO: [HLS 200-1510] Running: set_top peakPicker 
 19 | INFO: [HLS 200-1510] Running: add_files peakPicker.cpp 
 20 | INFO: [HLS 200-10] Adding design file 'peakPicker.cpp' to the project
 21 | INFO: [HLS 200-1510] Running: add_files -tb peakPicker_tb.cpp 
 22 | INFO: [HLS 200-10] Adding test bench file 'peakPicker_tb.cpp' to the project
 23 | INFO: [HLS 200-1510] Running: add_files -tb ./locations_3_ref.txt 
 24 | INFO: [HLS 200-10] Adding test bench file './locations_3_ref.txt' to the project
 25 | INFO: [HLS 200-1510] Running: add_files -tb ./peakLocs_out.txt 
 26 | INFO: [HLS 200-10] Adding test bench file './peakLocs_out.txt' to the project
 27 | INFO: [HLS 200-1510] Running: add_files -tb ./pssCorrMagSq_3_in.txt 
 28 | INFO: [HLS 200-10] Adding test bench file './pssCorrMagSq_3_in.txt' to the project
 29 | INFO: [HLS 200-1510] Running: add_files -tb ./threshold_in.txt 
 30 | INFO: [HLS 200-10] Adding test bench file './threshold_in.txt' to the project
 31 | INFO: [HLS 200-1510] Running: open_solution solution1 
 32 | INFO: [HLS 200-10] Creating and opening solution '/home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1'.
 33 | INFO: [HLS 200-1505] Using default flow_target 'vivado'
 34 | Resolution: For help on HLS 200-1505 see docs.xilinx.com/access/sources/dita/topic?Doc_Version=2024.2%20English&url=ug1448-hls-guidance&resourceid=200-1505.html
 35 | INFO: [HLS 200-1510] Running: set_part xc7k410t-ffg900-2 
 36 | INFO: [HLS 200-1611] Setting target device to 'xc7k410t-ffg900-2'
 37 | INFO: [HLS 200-1510] Running: create_clock -period 3.90 -name default 
 38 | INFO: [SYN 201-201] Setting up clock 'default' with a period of 3.9ns.
 39 | INFO: [HLS 200-1510] Running: set_clock_uncertainty 12.5% 
 40 | INFO: [SYN 201-201] Setting up clock 'default' with an uncertainty of 0.487ns.
 41 | INFO: [HLS 200-1510] Running: csynth_design 
 42 | INFO: [HLS 200-111] Finished File checks and directory preparation: CPU user time: 0.04 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.04 seconds; current allocated memory: 640.586 MB.
 43 | INFO: [HLS 200-10] Analyzing design file 'peakPicker.cpp' ... 
 44 | INFO: [HLS 200-111] Finished Source Code Analysis and Preprocessing: CPU user time: 1.67 seconds. CPU system time: 0.5 seconds. Elapsed time: 2.19 seconds; current allocated memory: 642.633 MB.
 45 | INFO: [HLS 200-777] Using interface defaults for 'Vivado' flow target.
 46 | INFO: [HLS 200-1995] There were 912 instructions in the design after the 'Compile/Link' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 47 | INFO: [HLS 200-1995] There were 298 instructions in the design after the 'Unroll/Inline (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 48 | INFO: [HLS 200-1995] There were 148 instructions in the design after the 'Unroll/Inline (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 49 | INFO: [HLS 200-1995] There were 146 instructions in the design after the 'Unroll/Inline (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 50 | INFO: [HLS 200-1995] There were 146 instructions in the design after the 'Unroll/Inline (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 51 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 52 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 53 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 54 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Array/Struct (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 55 | INFO: [HLS 200-1995] There were 70 instructions in the design after the 'Array/Struct (step 5)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 56 | INFO: [HLS 200-1995] There were 70 instructions in the design after the 'Performance (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 57 | INFO: [HLS 200-1995] There were 69 instructions in the design after the 'Performance (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 58 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Performance (step 3)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 59 | INFO: [HLS 200-1995] There were 68 instructions in the design after the 'Performance (step 4)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 60 | INFO: [HLS 200-1995] There were 77 instructions in the design after the 'HW Transforms (step 1)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 61 | INFO: [HLS 200-1995] There were 79 instructions in the design after the 'HW Transforms (step 2)' phase of compilation. See the Design Size Report for more details: /home/amd/UTS/llm-fpga-design/example/proj_peakPicker/solution1/syn/report/csynth_design_size.rpt
 62 | INFO: [HLS 214-186] Unrolling loop 'init_sr' (peakPicker.cpp:36:11) in function 'peakPicker_wrapper' completely with a factor of 11 (peakPicker.cpp:14:0)
 63 | INFO: [HLS 200-111] Finished Compiling Optimization and Transform: CPU user time: 1.52 seconds. CPU system time: 0.44 seconds. Elapsed time: 6.66 seconds; current allocated memory: 652.320 MB.
 64 | INFO: [HLS 200-111] Finished Checking Pragmas: CPU user time: 0 seconds. CPU system time: 0 seconds. Elapsed time: 0 seconds; current allocated memory: 652.320 MB.
 65 | INFO: [HLS 200-10] Starting code transformations ...
 66 | INFO: [HLS 200-111] Finished Standard Transforms: CPU user time: 0 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 652.391 MB.
 67 | INFO: [HLS 200-10] Checking synthesizability ...
 68 | INFO: [HLS 200-111] Finished Checking Synthesizability: CPU user time: 0.01 seconds. CPU system time: 0.01 seconds. Elapsed time: 0 seconds; current allocated memory: 652.406 MB.
 69 | INFO: [XFORM 203-11] Balancing expressions in function 'peakPicker_wrapper' (peakPicker.cpp:47:13)...11 expression(s) balanced.
 70 | INFO: [HLS 200-111] Finished Loop, function and other optimizations: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.03 seconds; current allocated memory: 673.844 MB.
 71 | INFO: [HLS 200-111] Finished Architecture Synthesis: CPU user time: 0.02 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 674.102 MB.
 72 | INFO: [HLS 200-10] Starting hardware synthesis ...
 73 | INFO: [HLS 200-10] Synthesizing 'peakPicker' ...
 74 | INFO: [HLS 200-10] ----------------------------------------------------------------
 75 | INFO: [HLS 200-42] -- Implementing module 'peakPicker_wrapper' 
 76 | INFO: [HLS 200-10] ----------------------------------------------------------------
 77 | INFO: [SCHED 204-11] Starting scheduling ...
 78 | INFO: [SCHED 204-61] Pipelining loop 'ultra_main_loop'.
 79 | INFO: [HLS 200-1470] Pipelining result : Target II = 1, Final II = 1, Depth = 4, loop 'ultra_main_loop'
 80 | INFO: [SCHED 204-11] Finished scheduling.
 81 | INFO: [HLS 200-111] Finished Scheduling: CPU user time: 0.04 seconds. CPU system time: 0.02 seconds. Elapsed time: 0.06 seconds; current allocated memory: 675.617 MB.
 82 | INFO: [BIND 205-100] Starting micro-architecture generation ...
 83 | INFO: [BIND 205-101] Performing variable lifetime analysis.
 84 | INFO: [BIND 205-101] Exploring resource sharing.
 85 | INFO: [BIND 205-101] Binding ...
 86 | INFO: [BIND 205-100] Finished micro-architecture generation.
 87 | INFO: [HLS 200-111] Finished Binding: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB.
 88 | INFO: [HLS 200-10] ----------------------------------------------------------------
 89 | INFO: [HLS 200-42] -- Implementing module 'peakPicker' 
 90 | INFO: [HLS 200-10] ----------------------------------------------------------------
 91 | INFO: [SCHED 204-11] Starting scheduling ...
 92 | INFO: [SCHED 204-11] Finished scheduling.
 93 | INFO: [HLS 200-111] Finished Scheduling: CPU user time: 0.02 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB.
 94 | INFO: [BIND 205-100] Starting micro-architecture generation ...
 95 | INFO: [BIND 205-101] Performing variable lifetime analysis.
 96 | INFO: [BIND 205-101] Exploring resource sharing.
 97 | INFO: [BIND 205-101] Binding ...
 98 | INFO: [BIND 205-100] Finished micro-architecture generation.
 99 | INFO: [HLS 200-111] Finished Binding: CPU user time: 0.01 seconds. CPU system time: 0 seconds. Elapsed time: 0.01 seconds; current allocated memory: 675.617 MB.
100 | INFO: [HLS 200-10] ----------------------------------------------------------------
101 | INFO: [HLS 200-10] -- Generating RTL for module 'peakPicker_wrapper' 
102 | INFO: [HLS 200-10] ----------------------------------------------------------------
103 | INFO: [HLS 200-1030] Apply Unified Pipeline Control on module 'peakPicker_wrapper' pipeline 'ultra_main_loop' pipeline type 'loop pipeline'
104 | INFO: [RTGEN 206-100] Finished creating RTL model for 'peakPicker_wrapper'.
105 | INFO: [HLS 200-111] Finished Creating RTL model: CPU user time: 0.02 seconds. CPU system time: 0 seconds. Elapsed time: 0.02 seconds; current allocated memory: 675.617 MB.
106 | INFO: [HLS 200-10] ----------------------------------------------------------------
107 | INFO: [HLS 200-10] -- Generating RTL for module 'peakPicker' 
108 | INFO: [HLS 200-10] ----------------------------------------------------------------
109 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/xcorr' to 'ap_memory'.
110 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/threshold' to 'ap_memory'.
111 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/input_length' to 'ap_none'.
112 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/locations' to 'ap_memory'.
113 | INFO: [RTGEN 206-500] Setting interface mode on port 'peakPicker/num_peaks' to 'ap_vld'.
114 | INFO: [RTGEN 206-500] Setting interface mode on function 'peakPicker' to 'ap_ctrl_hs'.
115 | INFO: [RTGEN 206-100] Finished creating RTL model for 'peakPicker'.
116 | INFO: [HLS 200-111] Finished Creating RTL model: CPU user time: 0.06 seconds. CPU system time: 0 seconds. Elapsed time: 0.07 seconds; current allocated memory: 676.344 MB.
117 | INFO: [HLS 200-111] Finished Generating all RTL models: CPU user time: 0.09 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.1 seconds; current allocated memory: 679.273 MB.
118 | INFO: [HLS 200-111] Finished Updating report files: CPU user time: 0.14 seconds. CPU system time: 0.01 seconds. Elapsed time: 0.14 seconds; current allocated memory: 681.910 MB.
119 | INFO: [VHDL 208-304] Generating VHDL RTL for peakPicker.
120 | INFO: [VLOG 209-307] Generating Verilog RTL for peakPicker.
121 | INFO: [HLS 200-790] **** Loop Constraint Status: All loop constraints were satisfied.
122 | INFO: [HLS 200-789] **** Estimated Fmax: 310.46 MHz
123 | INFO: [HLS 200-2161] Finished Command csynth_design Elapsed time: 00:00:09; Allocated memory: 41.367 MB.
124 | INFO: [HLS 200-112] Total CPU user time: 6.22 seconds. Total CPU system time: 1.48 seconds. Total elapsed time: 12.27 seconds; peak allocated memory: 681.953 MB.
125 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # LLM-Aided FPGA Design Flow
  2 | 
  3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  4 | [![HLS Version](https://img.shields.io/badge/HLS-2023.2-blue.svg)](https://www.xilinx.com/products/design-tools/vitis/vitis-hls.html)
  5 | 
  6 | ## Overview
  7 | 
  8 | This repository demonstrates a modern approach to FPGA design using Large Language Models (LLMs) to automate and enhance the design workflow from MATLAB algorithms to optimized hardware implementations. By leveraging LLMs like Claude 3.7 Sonnet, GPT-4, or GitHub Copilot, we significantly reduce development time while maintaining design quality.
  9 | 
 10 | The repository showcases:
 11 | 
 12 | 1. Conversion of MATLAB reference algorithms to HLS C++ 
 13 | 2. Automated debugging of C simulation errors
 14 | 3. Prompt engineering techniques for hardware design tasks
 15 | 4. Performance optimization through LLM-guided directives
 16 | 
 17 | ## Case Study: 5G NR Peak Picker
 18 | 
 19 | Our primary example is a peak picker algorithm for 5G NR Synchronization Signal Block (SSB) detection, which demonstrates the complete LLM-assisted workflow from MATLAB specification to optimized HLS implementation.
 20 | 
 21 | ### Algorithm Description
 22 | 
 23 | The peak picker algorithm:
 24 | - Takes PSS (Primary Synchronization Signal) correlation magnitude squared values as input
 25 | - Compares values against thresholds to identify candidate peaks
 26 | - Applies filtering to identify true peaks
 27 | - Returns the locations (indices) of detected peaks
 28 | 
 29 | ## LLM-Based HLS Code Generation and Debugging Workflow
 30 | 
 31 | Our comprehensive workflow automates the entire process from MATLAB algorithm to optimized HLS implementation:
 32 | 
 33 | ```mermaid
 34 | graph TB
 35 |     subgraph Inputs
 36 |         A[MATLAB Prototype Files] -->|Input| B(Generate HLS Code)
 37 |         P[Prompt Template] -->|Format| B
 38 |     end
 39 | 
 40 |     subgraph AI_Code_Generation [AI Code Generation Process]
 41 |         B -->|Creates Prompt| C{Select LLM Service}
 42 |         C -->|Default| D[Gemini API]
 43 |         C -->|Fallback| E[OpenAI API]
 44 |         C -->|Fallback| F[Claude API]
 45 |         
 46 |         D & E & F -->|Generate| G[LLM Response]
 47 |         G -->|Parse| H[Extract Code]
 48 |         H -->|Save| I[Generated HLS Files]
 49 |     end
 50 | 
 51 |     subgraph Outputs
 52 |         I -->|Header| J[component.hpp]
 53 |         I -->|Implementation| K[component.cpp]
 54 |         I -->|Testbench| L[component_tb.cpp]
 55 |     end
 56 | 
 57 |     subgraph Verification
 58 |         J & K & L -->|Compile & Run| M[C Simulation]
 59 |         M -->|Pass| N[HLS Synthesis]
 60 |         M -->|Fail| O[Error Logs]
 61 |     end
 62 | 
 63 |     subgraph AI_Debug_Assistant [AI Debug Assistant]
 64 |         O -->|Input| Q(Debug Assistant)
 65 |         J & K & L -->|Source Code| Q
 66 |         Q -->|Creates Debug Prompt| R{Select LLM Service}
 67 |         R -->|Default| S[Gemini API]
 68 |         R -->|Fallback| T[OpenAI API]
 69 |         R -->|Fallback| U[Claude API]
 70 |         
 71 |         S & T & U -->|Analyze| V[LLM Debug Analysis]
 72 |         V -->|Generate| W[Debug Report]
 73 |         V -->|Extract| X[Code Fixes]
 74 |         X -->|Optional| Y[Apply Fixes]
 75 |         Y -->|Update| J & K & L
 76 |     end
 77 | 
 78 |     style D fill:#34A853,stroke:#34A853,color:white
 79 |     style S fill:#34A853,stroke:#34A853,color:white
 80 |     style G fill:#F9AB00,stroke:#F9AB00,color:white
 81 |     style V fill:#F9AB00,stroke:#F9AB00,color:white
 82 |     style I fill:#4285F4,stroke:#4285F4,color:white
 83 |     style W fill:#4285F4,stroke:#4285F4,color:white
 84 | ```
 85 | 
 86 | ### Workflow Stages
 87 | 
 88 | #### 1. Input Stage
 89 | - **MATLAB Prototype Files**: Reference algorithm implementation in MATLAB
 90 | - **Prompt Template**: Structured instructions for the LLM to follow when generating HLS code
 91 | 
 92 | #### 2. AI Code Generation Process
 93 | - **Creates Prompt**: Combines MATLAB code with template for comprehensive context
 94 | - **Select LLM Service**: Chooses between Gemini (default), OpenAI, or Claude APIs
 95 | - **LLM Response**: Raw text response containing code and explanations
 96 | - **Extract Code**: Parses response to identify different file types and code sections
 97 | - **Generated HLS Files**: Creates properly structured C++ files ready for simulation
 98 | 
 99 | #### 3. Output Stage
100 | - **Header File**: Contains class definitions, function declarations, and constants
101 | - **Implementation File**: Contains the core HLS algorithm implementation with pragmas
102 | - **Testbench File**: Includes data loading, function calls, and verification logic
103 | 
104 | #### 4. Verification Stage
105 | - **C Simulation**: Compile and test the generated code for functional correctness
106 | - **HLS Synthesis**: If simulation passes, proceed to hardware synthesis
107 | - **Error Logs**: If simulation fails, collect error information for debugging
108 | 
109 | #### 5. AI Debug Assistant Stage
110 | - **Debug Assistant**: Takes error logs and source files as input
111 | - **Creates Debug Prompt**: Structures the debugging context for LLM analysis
112 | - **LLM Analysis**: AI analyzes errors and suggests specific code fixes
113 | - **Debug Report**: Comprehensive explanation of issues and solutions
114 | - **Code Fixes**: Specific code changes that can be automatically applied
115 | - **Apply Fixes**: Update source files with AI-suggested corrections
116 | 
117 | ### Prompt Engineering for Code Generation
118 | 
119 | We've developed specialized prompt templates for effective code generation:
120 | 
121 | 1. **Context Section**: Explains the algorithm purpose and background
122 | 2. **Task Description**: Clearly defines what the LLM needs to implement
123 | 3. **Implementation Requirements**: Specifies coding standards, interfaces, and optimizations
124 | 4. **Deliverables**: Clearly states what files should be produced
125 | 
126 | Example from our peak picker implementation:
127 | 
128 | ```markdown
129 | # Copilot Instructions for Peak Picker Implementation
130 | 
131 | ## Project Context
132 | This project implements a critical component of a 5G NR SSB detection application. 
133 | The peak picker algorithm identifies SSB signals by locating peaks where the 
134 | magnitude squared of the PSS correlation (`xcorr`) exceeds a predefined threshold.
135 | 
136 | ## Task Description
137 | Your task is to translate the MATLAB peak picker algorithm into efficient HLS C++ 
138 | code while preserving exact functionality. The implementation should be optimized 
139 | for FPGA deployment using Xilinx HLS directives.
140 | 
141 | [Additional sections...]
142 | ```
143 | 
144 | ## How the Debug Assistant Works
145 | 
146 | The debug assistant provides automated, AI-powered analysis and correction of HLS simulation errors:
147 | 
148 | ```mermaid
149 | graph TD
150 |     subgraph Inputs
151 |         A[Error Log] -->|read_file| C
152 |         B[HLS C++ Source Files] -->|read_file| D
153 |     end
154 | 
155 |     subgraph Processing
156 |         C[Extract Error Information] --> E
157 |         D[Parse Source Code] --> E
158 |         E[Create Debug Prompt] --> F
159 |     end
160 | 
161 |     subgraph LLM_Analysis
162 |         F[Query LLM API] -->|model selection| G{Select Model}
163 |         G -->|gemini-2.0-pro-exp| H[Gemini API]
164 |         G -->|gpt-4/gpt-3.5-turbo| I[OpenAI API]
165 |         G -->|claude-sonnet| J[Claude API]
166 |         H --> K[LLM Analysis Response]
167 |         I --> K
168 |         J --> K
169 |     end
170 | 
171 |     subgraph Outputs
172 |         K --> L[Generate Debug Report]
173 |         K --> M[Parse Code Corrections]
174 |         
175 |         L --> N[Save Markdown Report]
176 |         M --> O[Apply Code Fixes]
177 |         O -->|user confirmation| P[Edit Source Files]
178 |     end
179 | 
180 |     style H fill:#34A853,stroke:#34A853,color:white
181 |     style K fill:#F9AB00,stroke:#F9AB00,color:white
182 |     style P fill:#4285F4,stroke:#4285F4,color:white
183 |     style N fill:#4285F4,stroke:#4285F4,color:white
184 | ```
185 | 
186 | ### Debug Workflow Stages
187 | 
188 | #### 1. Inputs Processing
189 | - **Error Log Analysis**: Extracts meaningful error patterns from C simulation logs
190 | - **Source Code Parsing**: Gathers relevant source files to provide complete context
191 | 
192 | #### 2. Processing
193 | - **Extract Error Information**: Identifies specific error messages and patterns
194 | - **Parse Source Code**: Organizes code context for the LLM
195 | - **Create Debug Prompt**: Structures the debugging request with all relevant information
196 | 
197 | #### 3. LLM Analysis
198 | - **Query LLM API**: Sends the prompt to the selected AI service
199 | - **Model Selection**: Chooses between Gemini (primary), GPT, or Claude models
200 | - **LLM Response**: AI analyzes the issues and provides detailed debugging guidance
201 | 
202 | #### 4. Outputs
203 | - **Generate Debug Report**: Creates detailed markdown reports explaining errors and fixes
204 | - **Parse Code Corrections**: Extracts specific code changes from the LLM response
205 | - **Apply Code Fixes**: Optionally implements the suggested changes with user confirmation
206 | - **Edit Source Files**: Updates the original files with proper change tracking
207 | 
208 | The debug assistant handles common HLS errors including:
209 | - Interface mismatches between implementation and testbench
210 | - Data type inconsistencies
211 | - Indexing errors
212 | - Algorithmic logical errors
213 | - Misunderstandings of HLS-specific behaviors
214 | 
215 | ## LLM Selection and Integration
216 | 
217 | Our tools support multiple LLM providers with different capabilities:
218 | 
219 | - **Gemini Pro/Flash**: Offers strong reasoning about code structures and efficient debugging
220 | - **GPT-3.5/4**: Provides detailed code generation with comprehensive comments
221 | - **Claude Sonnet**: Excels at understanding complex algorithms and providing thorough explanations
222 | 
223 | The framework automatically selects appropriate models based on task complexity, or allows specifying a model for specific use cases.
224 | 
225 | ## Automated File Generation and Management
226 | 
227 | The `generate_hls_code.py` tool implements sophisticated code extraction algorithms to:
228 | 
229 | - Parse LLM responses for code blocks
230 | - Identify appropriate file types (header, implementation, testbench)
231 | - Generate properly formatted HLS C++ files
232 | - Maintain correct dependencies between files
233 | - Create project structures compatible with Vitis HLS
234 | 
235 | ## Getting Started
236 | 
237 | ### Prerequisites
238 | 
239 | - Vitis HLS 2023.2 or newer
240 | - MATLAB R2023a or newer (for reference models)
241 | - Python 3.8+ with necessary libraries for data handling
242 | - API keys for supported LLM services (at least one of the following):
243 |   - Google Gemini API key (recommended)
244 |   - OpenAI API key
245 |   - Anthropic Claude API key
246 | 
247 | ### Installation
248 | 
249 | ```bash
250 | # Clone this repository
251 | git clone https://github.com/rockyco/llm-fpga-design.git
252 | cd llm-fpga-design
253 | 
254 | # Set up your environment
255 | source /path/to/Vitis/settings64.sh
256 | 
257 | # Install required Python packages
258 | pip install -r requirements.txt
259 | 
260 | # Add your API keys to the .bashrc or .env file
261 | echo "GEMINI_API_KEY=your_gemini_api_key" >> ~/.bashrc
262 | echo "OPENAI_API_KEY=your_openai_api_key" >> ~/.bashrc
263 | echo "CLAUDE_API_KEY=your_claude_api_key" >> ~/.bashrc
264 | source ~/.bashrc
265 | ```
266 | 
267 | ### Usage
268 | 
269 | 1. **Generate HLS C++ from MATLAB reference**:
270 |    Supported models: `gemini-2.0-flash-thinking-exp`, `gemini-2.0-pro-exp`, `gpt-4`, `gpt-3.5-turbo`, `claude-sonnet`
271 |    ```bash
272 |    python3 scripts/generate_hls_code.py --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m --prompt prompts/hls_conversion.md --model gemini-2.0-flash-thinking-exp
273 |    ```
274 | 
275 | 2. **Run C simulation**:
276 |    ```bash
277 |    cd implementations/peakPicker
278 |    make csim
279 |    ```
280 | 
281 | 3. **Debug errors with LLM assistance**:
282 |    ```bash
283 |    cd ../../
284 |    python3 scripts/debug_assistant.py --error_log implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log --source_file implementations/peakPicker/peakPicker.cpp implementations/peakPicker/peakPicker.hpp implementations/peakPicker/peakPicker_tb.cpp
285 |    ```
286 | 
287 | 4. **Synthesize and export RTL**:
288 |    ```bash
289 |    make csynth
290 |    make export_ip
291 |    ```
292 | 
293 | ## Code Generation Process
294 | 
295 | The `generate_hls_code.py` script implements a comprehensive code generation pipeline:
296 | 
297 | 1. **Code Analysis**: Examines MATLAB reference to understand algorithm function
298 | 2. **Prompt Construction**: Combines specialized templates with example code
299 | 3. **Model Selection**: Uses the most appropriate LLM based on task needs
300 | 4. **Response Processing**: Implements robust parsing to extract code blocks
301 | 5. **Code Organization**: Creates properly structured HLS project files
302 | 6. **Documentation**: Automatically preserves explanations from the LLM
303 | 
304 | Key features include:
305 | - Support for multi-file MATLAB input
306 | - Robust code block extraction with multiple fallback strategies
307 | - File type identification based on content patterns
308 | - Project structure generation following HLS best practices
309 | 
310 | ## Repository Structure
311 | 
312 | ```
313 | llm-fpga-design/
314 | ├── algorithms/                  # MATLAB reference implementations
315 | ├── implementations/             # Generated HLS C++ implementations
316 | │   └── peakPicker/              # Peak Picker implementation case study
317 | ├── prompts/                     # LLM prompt templates
318 | ├── scripts/                     # Automation scripts
319 | │   ├── generate_hls_code.py     # Code generation script
320 | │   └── debug_assistant.py       # Debugging assistant script
321 | ├── data/                        # Test data files
322 | └── docs/                        # Documentation
323 | ```
324 | 
325 | ## Best Practices
326 | 
327 | Based on our experience, we recommend these best practices for LLM-assisted FPGA design:
328 | 
329 | 1. **Structured Prompts**: Use clear, detailed prompts with specific sections for context, requirements, and deliverables
330 | 2. **Iterative Refinement**: Start with high-level requirements, then refine implementation details
331 | 3. **Input/Output Examples**: Provide concrete examples of expected behavior
332 | 4. **Domain-Specific Knowledge**: Include relevant HLS and FPGA concepts in prompts
333 | 5. **Error Analysis**: When debugging, provide complete error messages and surrounding context
334 | 6. **Model Selection**: Choose appropriate models for different tasks:
335 |    - Use Gemini Flash for quick iterations and debugging
336 |    - Use GPT-4 for complex algorithms needing careful implementation
337 |    - Use Claude for detailed explanations and educational contexts
338 | 7. **Prompt Templates**: Maintain a library of effective prompt templates for reuse
339 | 8. **Human Review**: Always review and understand generated code before synthesis
340 | 
341 | ## Limitations and Considerations
342 | 
343 | - LLMs may not be aware of the latest HLS features or hardware-specific optimizations
344 | - Complex timing constraints might require manual refinement
345 | - While LLMs can generate optimized code, expert review is still recommended for critical applications
346 | - Actual hardware performance should be verified through physical implementation
347 | - LLMs may occasionally:
348 |   - Generate incorrect pragma syntax that needs manual correction
349 |   - Not fully understand resource vs. performance tradeoffs
350 |   - Struggle with very complex interface requirements
351 |   - Need help with target-specific optimizations
352 | 
353 | ## Contributing
354 | 
355 | Contributions are welcome! Please feel free to submit a Pull Request.
356 | 
357 | 1. Fork the repository
358 | 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
359 | 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
360 | 4. Push to the branch (`git push origin feature/amazing-feature`)
361 | 5. Open a Pull Request
362 | 
363 | ## License
364 | 
365 | This project is licensed under the MIT License - see the LICENSE file for details.
366 | 
367 | ## Acknowledgments
368 | 
369 | - Thanks to the open-source HLS and FPGA design communities
370 | - Special thanks to the developers of Google Gemini 2.5 pro API, Claude 3.7 Sonnet, and GitHub Copilot for enabling this workflow
371 | 
372 | 


--------------------------------------------------------------------------------
/scripts/optimize_hls_code.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import os
  5 | import sys
  6 | import requests
  7 | import json
  8 | import re
  9 | import openai
 10 | import google.generativeai as genai
 11 | from pathlib import Path
 12 | from datetime import datetime
 13 | from dotenv import load_dotenv
 14 | 
 15 | # Load environment variables for API keys
 16 | load_dotenv()
 17 | 
 18 | # Get API keys from environment variables
 19 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 20 | GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
 21 | CLAUDE_API_KEY = os.getenv('CLAUDE_API_KEY')
 22 | 
 23 | def parse_arguments():
 24 |     """Parse command line arguments."""
 25 |     parser = argparse.ArgumentParser(description='Optimize HLS C++ code for better performance using LLM')
 26 |     parser.add_argument('--source_dir', required=True, 
 27 |                         help='Directory containing HLS source files to optimize')
 28 |     parser.add_argument('--prompt', required=False, 
 29 |                         help='Path to prompt template file (or prompt name)')
 30 |     parser.add_argument('--output_dir', default=None, 
 31 |                         help='Directory to save optimized HLS code (defaults to source_dir)')
 32 |     parser.add_argument('--model', default='gemini-2.5-pro-exp-03-25', 
 33 |                         help='LLM model to use')
 34 |     parser.add_argument('--primary_goal', required=False, default="Reduce latency", 
 35 |                         help='Primary optimization goal (e.g., "Reduce latency by 30%")')
 36 |     parser.add_argument('--secondary_goal', required=False, default="Maintain resource usage", 
 37 |                         help='Secondary optimization goal (e.g., "Maintain resource usage")')
 38 |     parser.add_argument('--api_key', 
 39 |                         help='API key for LLM service')
 40 |     return parser.parse_args()
 41 | 
 42 | def read_file(file_path):
 43 |     """Read and return the content of a file."""
 44 |     try:
 45 |         with open(file_path, 'r') as f:
 46 |             return f.read()
 47 |     except Exception as e:
 48 |         print(f"Error reading file {file_path}: {e}")
 49 |         sys.exit(1)
 50 | 
 51 | def find_source_files(source_dir):
 52 |     """Find all relevant HLS source files in the directory."""
 53 |     source_files = {}
 54 |     for ext in ['.cpp', '.hpp', '.h', '_tb.cpp']:
 55 |         for file in Path(source_dir).glob(f'*{ext}'):
 56 |             source_files[file.name] = str(file)
 57 |     
 58 |     # Also look for csynth.rpt or other report files
 59 |     for report_file in Path(source_dir).glob('**/csynth.rpt'):
 60 |         source_files['csynth.rpt'] = str(report_file)
 61 |     
 62 |     # Look for implementation reports
 63 |     for report_file in Path(source_dir).glob('**/verilog/report/**/*.rpt'):
 64 |         source_files[f'report_{report_file.name}'] = str(report_file)
 65 |     
 66 |     return source_files
 67 | 
 68 | def extract_performance_metrics(source_dir):
 69 |     """Extract performance metrics from synthesis and implementation reports."""
 70 |     metrics = {}
 71 |     
 72 |     # Look for csynth.rpt
 73 |     csynth_path = None
 74 |     for path in Path(source_dir).glob('**/csynth.rpt'):
 75 |         csynth_path = path
 76 |         break
 77 |     
 78 |     if csynth_path:
 79 |         try:
 80 |             csynth_content = read_file(str(csynth_path))
 81 |             
 82 |             # Extract latency information
 83 |             latency_match = re.search(r'Latency \(cycles\)\s*\|\s*min\s*\|\s*max\s*\|\s*min/max\s*\|\s*\n\s*\|\s*-+\s*\|\s*-+\s*\|\s*-+\s*\|\s*\n\s*\|\s*(\d+)\s*\|\s*(\d+)', csynth_content)
 84 |             if latency_match:
 85 |                 metrics['latency_min'] = int(latency_match.group(1))
 86 |                 metrics['latency_max'] = int(latency_match.group(2))
 87 |             
 88 |             # Extract resource utilization
 89 |             resource_pattern = r'(\|\s*([A-Za-z0-9]+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+%?\s*)\|)'
 90 |             resource_matches = re.findall(resource_pattern, csynth_content)
 91 |             
 92 |             if resource_matches:
 93 |                 metrics['resources'] = {}
 94 |                 for match in resource_matches:
 95 |                     resource_type = match[1].strip()
 96 |                     used = int(match[2])
 97 |                     total = int(match[3]) if match[3] != '0' else 0
 98 |                     metrics['resources'][resource_type] = {
 99 |                         'used': used,
100 |                         'total': total,
101 |                         'utilization': f"{(used/total*100):.2f}%" if total > 0 else "N/A"
102 |                     }
103 |         except Exception as e:
104 |             print(f"Error extracting metrics from csynth.rpt: {e}")
105 |     
106 |     # Format metrics as a string for the prompt
107 |     metrics_str = "## Performance Metrics\n\n"
108 |     
109 |     if 'latency_min' in metrics:
110 |         metrics_str += f"### Latency\n- Minimum: {metrics['latency_min']} cycles\n- Maximum: {metrics['latency_max']} cycles\n\n"
111 |     
112 |     if 'resources' in metrics:
113 |         metrics_str += "### Resource Utilization\n"
114 |         for resource, values in metrics['resources'].items():
115 |             metrics_str += f"- {resource}: {values['used']} used / {values['total']} total ({values['utilization']})\n"
116 |     
117 |     if metrics_str == "## Performance Metrics\n\n":
118 |         metrics_str += "No performance metrics available from synthesis reports."
119 |     
120 |     return metrics_str, metrics
121 | 
122 | def create_optimization_prompt(source_files, performance_metrics, prompt_template, primary_goal, secondary_goal):
123 |     """Create the prompt for code optimization."""
124 |     # Load source file contents
125 |     source_contents = {}
126 |     for name, path in source_files.items():
127 |         if name.endswith(('.cpp', '.hpp', '.h', '_tb.cpp')):
128 |             try:
129 |                 source_contents[name] = read_file(path)
130 |             except:
131 |                 source_contents[name] = f"Error reading {path}"
132 |     
133 |     # Create source files section
134 |     source_files_str = "## Source Files\n\n"
135 |     for name, content in source_contents.items():
136 |         source_files_str += f"### {name}\n```cpp\n{content}\n```\n\n"
137 |     
138 |     # Replace placeholders in template
139 |     prompt = prompt_template
140 |     replacements = {
141 |         "SOURCE_FILES": source_files_str,
142 |         "PERFORMANCE_METRICS": performance_metrics,
143 |         "PRIMARY_GOAL": primary_goal,
144 |         "SECONDARY_GOAL": secondary_goal
145 |     }
146 |     
147 |     for key, value in replacements.items():
148 |         if f"{{{{{key}}}}}" in prompt:
149 |             prompt = prompt.replace(f"{{{{{key}}}}}", value)
150 |     
151 |     return prompt
152 | 
153 | def query_openai(prompt, model="gpt-4"):
154 |     """Send a prompt to OpenAI API and get the response."""
155 |     if not OPENAI_API_KEY:
156 |         print("Error: OPENAI_API_KEY environment variable not set.")
157 |         sys.exit(1)
158 |         
159 |     url = "https://api.openai.com/v1/chat/completions"
160 |     headers = {
161 |         "Content-Type": "application/json",
162 |         "Authorization": f"Bearer {OPENAI_API_KEY}"
163 |     }
164 |     data = {
165 |         "model": model,
166 |         "messages": [
167 |             {"role": "system", "content": "You are an expert FPGA developer specializing in HLS C++ optimization."},
168 |             {"role": "user", "content": prompt}
169 |         ],
170 |         "temperature": 0.1  # Lower temperature for more deterministic output
171 |     }
172 |     
173 |     try:
174 |         response = requests.post(url, headers=headers, data=json.dumps(data))
175 |         response.raise_for_status()  # Raise exception for HTTP errors
176 |         return response.json()["choices"][0]["message"]["content"]
177 |     except requests.exceptions.RequestException as e:
178 |         print(f"Error calling OpenAI API: {e}")
179 |         if hasattr(e, 'response') and e.response:
180 |             print(f"Response: {e.response.text}")
181 |         sys.exit(1)
182 | 
183 | def query_claude(prompt, model="claude-3-sonnet-20240229"):
184 |     """Send a prompt to Anthropic Claude API and get the response."""
185 |     if not CLAUDE_API_KEY:
186 |         print("Error: CLAUDE_API_KEY environment variable not set.")
187 |         sys.exit(1)
188 |         
189 |     url = "https://api.anthropic.com/v1/messages"
190 |     headers = {
191 |         "Content-Type": "application/json",
192 |         "x-api-key": CLAUDE_API_KEY,
193 |         "anthropic-version": "2023-06-01"
194 |     }
195 |     data = {
196 |         "model": model,
197 |         "messages": [{"role": "user", "content": prompt}],
198 |         "temperature": 0.1
199 |     }
200 |     
201 |     try:
202 |         response = requests.post(url, headers=headers, json=data)
203 |         response.raise_for_status()
204 |         return response.json()["content"][0]["text"]
205 |     except requests.exceptions.RequestException as e:
206 |         print(f"Error calling Claude API: {e}")
207 |         if hasattr(e, 'response') and e.response:
208 |             print(f"Response: {e.response.text}")
209 |         sys.exit(1)
210 | 
211 | def query_gemini(prompt, model="gemini-2.5-pro-exp-03-25"):
212 |     """Send a prompt to Google Gemini API and get the response."""
213 |     if not GEMINI_API_KEY:
214 |         print("Error: GEMINI_API_KEY environment variable not set.")
215 |         sys.exit(1)
216 |         
217 |     # Extract the model name for the URL
218 |     url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
219 |     
220 |     headers = {
221 |         "Content-Type": "application/json"
222 |     }
223 |     params = {
224 |         "key": GEMINI_API_KEY
225 |     }
226 |     data = {
227 |         "contents": [{"parts": [{"text": "You are an expert FPGA developer specializing in HLS C++ optimization.\n\n" + prompt}]}],
228 |         "generationConfig": {
229 |             "temperature": 0.1
230 |         }
231 |     }
232 |     
233 |     try:
234 |         response = requests.post(url, headers=headers, params=params, json=data)
235 |         response.raise_for_status()
236 |         return response.json()["candidates"][0]["content"]["parts"][0]["text"]
237 |     except requests.exceptions.RequestException as e:
238 |         print(f"Error calling Gemini API: {e}")
239 |         if hasattr(e, 'response') and e.response:
240 |             print(f"Response: {e.response.text}")
241 |         sys.exit(1)
242 | 
243 | def query_llm(prompt, model="gemini-2.5-pro-exp-03-25"):
244 |     """Route the query to the appropriate LLM API based on the model."""
245 |     if model.startswith("gemini"):
246 |         return query_gemini(prompt, model)
247 |     elif model.startswith("gpt"):
248 |         return query_openai(prompt, model)
249 |     elif model.startswith("claude"):
250 |         return query_claude(prompt, model)
251 |     else:
252 |         print(f"Error: Unsupported model {model}.")
253 |         sys.exit(1)
254 | 
255 | def extract_optimized_code(llm_response):
256 |     """Extract optimized code blocks from LLM response."""
257 |     optimized_code = {}
258 |     
259 |     # Extract all code blocks with filenames
260 |     filename_patterns = [
261 |         r'###\s+([a-zA-Z0-9_]+\.[ch]pp)\s*```cpp\s*(.*?)```',
262 |         r'File:\s*([a-zA-Z0-9_]+\.[ch]pp)\s*```cpp\s*(.*?)```',
263 |         r'```cpp\s*//\s*([a-zA-Z0-9_]+\.[ch]pp)\s*(.*?)```'
264 |     ]
265 |     
266 |     for pattern in filename_patterns:
267 |         matches = re.findall(pattern, llm_response, re.DOTALL)
268 |         for filename, code in matches:
269 |             optimized_code[filename.strip()] = code.strip()
270 |     
271 |     return optimized_code
272 | 
273 | def apply_optimizations(source_dir, output_dir, optimized_code, llm_response):
274 |     """Apply the optimized code to the files."""
275 |     # Create output directory if it doesn't exist
276 |     if output_dir and output_dir != source_dir:
277 |         os.makedirs(output_dir, exist_ok=True)
278 |     else:
279 |         output_dir = source_dir
280 |         
281 |         # Create backup directory
282 |         backup_dir = os.path.join(source_dir, "backup_original")
283 |         os.makedirs(backup_dir, exist_ok=True)
284 |         
285 |         # Backup original files
286 |         for filename in optimized_code.keys():
287 |             source_file = os.path.join(source_dir, filename)
288 |             if os.path.exists(source_file):
289 |                 backup_file = os.path.join(backup_dir, filename)
290 |                 try:
291 |                     with open(source_file, 'r') as src, open(backup_file, 'w') as dst:
292 |                         dst.write(src.read())
293 |                     print(f"Backed up {filename} to {backup_dir}/")
294 |                 except Exception as e:
295 |                     print(f"Error backing up {filename}: {e}")
296 |     
297 |     # Write optimized code to files
298 |     for filename, code in optimized_code.items():
299 |         output_file = os.path.join(output_dir, filename)
300 |         try:
301 |             with open(output_file, 'w') as f:
302 |                 f.write(code)
303 |             print(f"Applied optimization to {filename}")
304 |         except Exception as e:
305 |             print(f"Error writing optimized code to {filename}: {e}")
306 |     
307 |     # Create a log file with the optimization details
308 |     log_file = os.path.join(output_dir, "optimization_log.md")
309 |     with open(log_file, 'w') as f:
310 |         f.write("# HLS Code Optimization Log\n\n")
311 |         f.write(f"Optimization performed on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
312 |         f.write("## Files Modified\n\n")
313 |         for filename in optimized_code.keys():
314 |             f.write(f"- {filename}\n")
315 |         f.write("\n## Optimizations Applied\n\n")
316 |         f.write(llm_response)
317 |     
318 |     print(f"Optimization log saved to {log_file}")
319 |     
320 |     return log_file
321 | 
322 | def main():
323 |     args = parse_arguments()
324 |     
325 |     # Load environment variables for API keys
326 |     load_dotenv()
327 |     
328 |     # Set API key from args or environment variable
329 |     api_key = args.api_key
330 |     if api_key:
331 |         if "gemini" in args.model.lower():
332 |             os.environ['GEMINI_API_KEY'] = api_key
333 |         elif "claude" in args.model.lower():
334 |             os.environ['CLAUDE_API_KEY'] = api_key
335 |         else:
336 |             os.environ['OPENAI_API_KEY'] = api_key
337 |     
338 |     # Find source files
339 |     source_files = find_source_files(args.source_dir)
340 |     if not source_files:
341 |         print(f"Error: No source files found in {args.source_dir}")
342 |         sys.exit(1)
343 |     
344 |     print(f"Found {len(source_files)} source files in {args.source_dir}")
345 |     for name in source_files:
346 |         print(f"  - {name}")
347 |     
348 |     # Extract performance metrics
349 |     performance_metrics_str, metrics = extract_performance_metrics(args.source_dir)
350 |     
351 |     # Set output directory
352 |     output_dir = args.output_dir if args.output_dir else args.source_dir
353 |     
354 |     # Load prompt template
355 |     if args.prompt and os.path.isfile(args.prompt):
356 |         prompt_template = read_file(args.prompt)
357 |     else:
358 |         # Try to find the performance_optimization.md prompt
359 |         script_dir = os.path.dirname(os.path.abspath(__file__))
360 |         project_dir = os.path.dirname(script_dir)
361 |         prompt_path = os.path.join(project_dir, "prompts", "performance_optimization.md")
362 |         
363 |         if os.path.isfile(prompt_path):
364 |             prompt_template = read_file(prompt_path)
365 |         else:
366 |             print("Error: Could not find performance_optimization.md prompt")
367 |             basic_template = """# HLS Performance Optimization
368 | 
369 | Please analyze the provided HLS source code and suggest optimizations to improve performance based on the provided metrics.
370 | 
371 | ## Source Files
372 | {{SOURCE_FILES}}
373 | 
374 | ## Performance Metrics
375 | {{PERFORMANCE_METRICS}}
376 | 
377 | ## Optimization Goals
378 | - Primary goal: {{PRIMARY_GOAL}}
379 | - Secondary goal: {{SECONDARY_GOAL}}
380 | 
381 | Please suggest specific HLS pragmas and code modifications to achieve these goals.
382 | """
383 |             prompt_template = basic_template
384 |             print("Using basic optimization prompt template")
385 |     
386 |     # Create the optimization prompt
387 |     optimization_prompt = create_optimization_prompt(
388 |         source_files, 
389 |         performance_metrics_str, 
390 |         prompt_template, 
391 |         args.primary_goal, 
392 |         args.secondary_goal
393 |     )
394 |     
395 |     # Get component name from directory
396 |     component_name = os.path.basename(os.path.normpath(args.source_dir))
397 |     
398 |     print(f"Generating optimizations for {component_name}...")
399 |     print(f"Using model: {args.model}")
400 |     print(f"Primary goal: {args.primary_goal}")
401 |     print(f"Secondary goal: {args.secondary_goal}")
402 |     
403 |     # Call the LLM
404 |     llm_response = query_llm(optimization_prompt, args.model)
405 |     
406 |     # Extract optimized code
407 |     optimized_code = extract_optimized_code(llm_response)
408 |     
409 |     if not optimized_code:
410 |         print("Warning: No optimized code blocks detected in the LLM response.")
411 |         # Save the full response
412 |         response_path = os.path.join(args.source_dir, "optimization_suggestions.md") 
413 |         with open(response_path, 'w') as f:
414 |             f.write(llm_response)
415 |         print(f"Saved optimization suggestions to {response_path}")
416 |         sys.exit(1)
417 |     
418 |     # Apply optimizations
419 |     log_file = apply_optimizations(args.source_dir, output_dir, optimized_code, llm_response)
420 |     
421 |     print("\nOptimization complete!")
422 |     print(f"Optimized {len(optimized_code)} files")
423 |     print(f"Output directory: {output_dir}")
424 |     print(f"Optimization log: {log_file}")
425 |     
426 |     # Track feedback for prompt improvement
427 |     feedback_file = os.path.join(project_dir, "prompt_feedback.json")
428 |     os.makedirs(os.path.dirname(feedback_file), exist_ok=True)
429 |     
430 |     with open(feedback_file, "a") as f:
431 |         feedback = {
432 |             "prompt": "performance_optimization",
433 |             "timestamp": datetime.now().isoformat(),
434 |             "model": args.model,
435 |             "component": component_name,
436 |             "status": "success" if optimized_code else "partial",
437 |             "files_optimized": len(optimized_code),
438 |             "optimized_files": list(optimized_code.keys())
439 |         }
440 |         f.write(json.dumps(feedback) + "\n")
441 |     
442 |     # Return the output directory for the orchestrator
443 |     print(output_dir)
444 | 
445 | if __name__ == "__main__":
446 |     main()
447 | 


--------------------------------------------------------------------------------
/implementations/peakPicker/llm_response.md:
--------------------------------------------------------------------------------
  1 | Okay, here is the HLS C++ implementation for the `peakPicker` component, following your detailed instructions.
  2 | 
  3 | **Assumptions:**
  4 | 
  5 | 1.  **Fixed-Point Precision:** Since the exact range and required precision of the input `xcorr` and `threshold` signals are not specified, I'll use `ap_fixed<32, 16>` as a starting point. This provides 16 bits for the integer part (including the sign bit) and 16 bits for the fractional part. **This should be adjusted based on actual signal analysis for the specific application.**
  6 | 2.  **Input Data Format:** The input files (`peakPicker_in.txt`, `threshold_in.txt`) contain one floating-point number per line.
  7 | 3.  **Reference Output Format:** The reference file (`peakPicker_ref.txt`) contains one integer location index per line.
  8 | 4.  **Maximum Number of Samples:** The testbench assumes the input files contain all samples. The HLS function takes `numSamples` as an argument.
  9 | 
 10 | ---
 11 | 
 12 | **File: `peakPicker.hpp`**
 13 | 
 14 | ```cpp
 15 | #ifndef PEAK_PICKER_HPP
 16 | #define PEAK_PICKER_HPP
 17 | 
 18 | #include <ap_fixed.h>
 19 | #include <hls_stream.h>
 20 | #include <ap_int.h> // For integer types if needed for indices
 21 | 
 22 | //--------------------------------------------------------------------------
 23 | // Constants and Parameters
 24 | //--------------------------------------------------------------------------
 25 | 
 26 | // Fixed-point type configuration (Adjust W and I based on signal analysis)
 27 | constexpr int DATA_W = 32; // Total width
 28 | constexpr int DATA_I = 16; // Integer width (including sign bit)
 29 | 
 30 | // Sliding window configuration (Matches MATLAB reference)
 31 | constexpr int WINDOW_LENGTH = 11;
 32 | constexpr int MIDDLE_LOCATION = WINDOW_LENGTH / 2; // Index 5 (0-based)
 33 | 
 34 | // Index type configuration (Ensure it can hold the maximum sample index)
 35 | constexpr int INDEX_W = 32; // Width for location indices
 36 | 
 37 | //--------------------------------------------------------------------------
 38 | // Type Definitions
 39 | //--------------------------------------------------------------------------
 40 | 
 41 | // Fixed-point type for input data (xcorr and threshold)
 42 | typedef ap_fixed<DATA_W, DATA_I> Data_t;
 43 | 
 44 | // Type for output location indices
 45 | // Using ap_uint for non-negative indices. Use ap_int if indices can be negative.
 46 | // Or simply use 'int' if standard integer sizes are sufficient and synthesizable.
 47 | typedef ap_uint<INDEX_W> Index_t;
 48 | // typedef int Index_t; // Alternative if standard int is sufficient
 49 | 
 50 | //--------------------------------------------------------------------------
 51 | // Function Declaration
 52 | //--------------------------------------------------------------------------
 53 | 
 54 | /**
 55 |  * @brief Finds peaks in a cross-correlation signal using a sliding window.
 56 |  *
 57 |  * @param xcorrStream     Input stream of cross-correlation magnitude squared values.
 58 |  * @param thresholdStream Input stream of threshold values (corresponding to xcorr samples).
 59 |  * @param locationStream  Output stream for detected peak location indices.
 60 |  * @param numSamples      Total number of samples to process from the input streams.
 61 |  */
 62 | void peakPicker(
 63 |     hls::stream<Data_t>& xcorrStream,
 64 |     hls::stream<Data_t>& thresholdStream,
 65 |     hls::stream<Index_t>& locationStream,
 66 |     int numSamples
 67 | );
 68 | 
 69 | #endif // PEAK_PICKER_HPP
 70 | ```
 71 | 
 72 | ---
 73 | 
 74 | **File: `peakPicker.cpp`**
 75 | 
 76 | ```cpp
 77 | #include "peakPicker.hpp"
 78 | 
 79 | /**
 80 |  * @brief Core implementation of the peakPicker algorithm for HLS.
 81 |  *
 82 |  * Implements a sliding window peak detector. A sample is considered a peak if:
 83 |  * 1. It is the maximum value within a window of WINDOW_LENGTH samples centered around it.
 84 |  * 2. Its value exceeds the corresponding threshold value.
 85 |  *
 86 |  * This function processes streaming input data and outputs the indices of detected peaks.
 87 |  */
 88 | void peakPicker(
 89 |     hls::stream<Data_t>& xcorrStream,
 90 |     hls::stream<Data_t>& thresholdStream,
 91 |     hls::stream<Index_t>& locationStream,
 92 |     int numSamples
 93 | ) {
 94 |     // Use function scope for pragmas if applying to the whole function's interface
 95 |     // #pragma HLS INTERFACE axis port=xcorrStream
 96 |     // #pragma HLS INTERFACE axis port=thresholdStream
 97 |     // #pragma HLS INTERFACE axis port=locationStream
 98 |     // #pragma HLS INTERFACE s_axilite port=numSamples bundle=control
 99 |     // #pragma HLS INTERFACE s_axilite port=return bundle=control
100 | 
101 |     // Buffers to hold the current window of data and thresholds
102 |     // These implement the sliding window mechanism.
103 |     Data_t xcorrBuffer[WINDOW_LENGTH];
104 |     Data_t thresholdBuffer[WINDOW_LENGTH];
105 | 
106 |     // Partitioning the arrays allows parallel access to elements within the
107 |     // pipelined loop, mapping them to registers for II=1.
108 |     #pragma HLS ARRAY_PARTITION variable=xcorrBuffer complete dim=1
109 |     #pragma HLS ARRAY_PARTITION variable=thresholdBuffer complete dim=1
110 | 
111 |     // Initialize buffers (optional, but good practice for simulation)
112 |     // Can be skipped if the initial state doesn't affect the first valid output
113 |     for (int i = 0; i < WINDOW_LENGTH; ++i) {
114 |         #pragma HLS UNROLL
115 |         xcorrBuffer[i] = 0;
116 |         thresholdBuffer[i] = 0;
117 |     }
118 | 
119 |     // Main processing loop iterates through all input samples
120 |     // Apply PIPELINE directive for high throughput (initiation interval II=1)
121 |     main_loop:
122 |     for (int i = 0; i < numSamples; ++i) {
123 |         #pragma HLS PIPELINE II=1
124 | 
125 |         // 1. Shift Buffers: Make space for the new sample at index 0
126 |         // Shift existing elements towards the end of the buffer
127 |         shift_loop:
128 |         for (int k = WINDOW_LENGTH - 1; k > 0; --k) {
129 |             #pragma HLS UNROLL // Unroll this small loop for efficiency
130 |             xcorrBuffer[k] = xcorrBuffer[k - 1];
131 |             thresholdBuffer[k] = thresholdBuffer[k - 1];
132 |         }
133 | 
134 |         // 2. Read New Samples: Read from input streams and place at the start (index 0)
135 |         Data_t newXcorrSample = xcorrStream.read();
136 |         Data_t newThresholdSample = thresholdStream.read();
137 |         xcorrBuffer[0] = newXcorrSample;
138 |         thresholdBuffer[0] = newThresholdSample;
139 | 
140 |         // 3. Peak Detection Logic: Start after the window is filled
141 |         // The first potential peak can be checked when i = WINDOW_LENGTH - 1
142 |         // At this point, the sample corresponding to the middle of the *first full window*
143 |         // is located at xcorrBuffer[MIDDLE_LOCATION]. This sample was originally read
144 |         // at iteration i = (WINDOW_LENGTH - 1) - MIDDLE_LOCATION.
145 |         if (i >= WINDOW_LENGTH - 1) {
146 |             // Get the sample at the center of the current window
147 |             Data_t middleSample = xcorrBuffer[MIDDLE_LOCATION];
148 |             Data_t middleThreshold = thresholdBuffer[MIDDLE_LOCATION];
149 | 
150 |             // Check if the middle sample is the maximum in the window
151 |             bool isPeak = true; // Assume it's a peak initially
152 |             compare_loop:
153 |             for (int k = 0; k < WINDOW_LENGTH; ++k) {
154 |                  #pragma HLS UNROLL // Unroll comparison loop for parallelism
155 |                 // Check if any other sample in the window is strictly greater
156 |                 // Note: MATLAB's `all(CompareOut <= 0)` means middleSample >= all others.
157 |                 if (k != MIDDLE_LOCATION && xcorrBuffer[k] > middleSample) {
158 |                     isPeak = false;
159 |                     break; // Exit loop early if not a peak
160 |                 }
161 |             }
162 | 
163 |             // Check if the peak condition is met (local maximum AND above threshold)
164 |             if (isPeak && (middleSample > middleThreshold)) {
165 |                 // Calculate the index of the peak in the original input stream
166 |                 // The sample currently at middleBuffer[MIDDLE_LOCATION] was read
167 |                 // MIDDLE_LOCATION iterations ago.
168 |                 Index_t peakLocation = i - MIDDLE_LOCATION;
169 | 
170 |                 // Write the detected peak location to the output stream
171 |                 locationStream.write(peakLocation);
172 |             }
173 |         }
174 |     } // end main_loop
175 | }
176 | ```
177 | 
178 | ---
179 | 
180 | **File: `peakPicker_tb.cpp`**
181 | 
182 | ```cpp
183 | #include "peakPicker.hpp"
184 | #include <iostream>
185 | #include <fstream>
186 | #include <vector>
187 | #include <string>
188 | #include <cmath>    // For std::abs
189 | #include <limits>   // For numeric_limits
190 | #include <iomanip>  // For std::setprecision
191 | 
192 | // Define input/output file names
193 | const std::string XCORR_INPUT_FILE = "pssCorrMagSq_3_in.txt"; // Matches MATLAB TB
194 | const std::string THRESHOLD_INPUT_FILE = "threshold_in.txt"; // Matches MATLAB TB
195 | const std::string REF_OUTPUT_FILE = "locations_3_ref.txt";   // Matches MATLAB TB
196 | // const std::string REF_OUTPUT_FILE = "peakLocs_out.txt"; // Use this if comparing against MATLAB output file
197 | 
198 | // Function to read data from a file into a vector of doubles
199 | bool readDataFile(const std::string& filename, std::vector<double>& data) {
200 |     std::ifstream infile(filename);
201 |     if (!infile.is_open()) {
202 |         std::cerr << "Error: Could not open file: " << filename << std::endl;
203 |         return false;
204 |     }
205 |     double value;
206 |     while (infile >> value) {
207 |         data.push_back(value);
208 |     }
209 |     if (infile.bad()) {
210 |         std::cerr << "Error: Failed reading data from file: " << filename << std::endl;
211 |         infile.close();
212 |         return false;
213 |     }
214 |     infile.close();
215 |     if (data.empty()) {
216 |        std::cerr << "Warning: No data read from file: " << filename << std::endl;
217 |        // Decide if this is an error or acceptable
218 |     }
219 |     std::cout << "Read " << data.size() << " values from " << filename << std::endl;
220 |     return true;
221 | }
222 | 
223 | // Function to read integer data (locations) from a file
224 | bool readIntDataFile(const std::string& filename, std::vector<int>& data) {
225 |     std::ifstream infile(filename);
226 |     if (!infile.is_open()) {
227 |         std::cerr << "Error: Could not open file: " << filename << std::endl;
228 |         return false;
229 |     }
230 |     int value;
231 |     while (infile >> value) {
232 |         data.push_back(value);
233 |     }
234 |      if (infile.bad()) {
235 |         std::cerr << "Error: Failed reading data from file: " << filename << std::endl;
236 |         infile.close();
237 |         return false;
238 |     }
239 |     infile.close();
240 |      if (data.empty()) {
241 |        std::cerr << "Warning: No data read from file: " << filename << std::endl;
242 |        // Decide if this is an error or acceptable
243 |     }
244 |     std::cout << "Read " << data.size() << " values from " << filename << std::endl;
245 |     return true;
246 | }
247 | 
248 | 
249 | int main() {
250 |     std::cout << "--- Starting Peak Picker Testbench ---" << std::endl;
251 | 
252 |     // --- Data Loading ---
253 |     std::vector<double> xcorrVec, thresholdVec;
254 |     std::vector<int> refLocsVec; // Use int for reference locations
255 | 
256 |     std::cout << "Loading input data..." << std::endl;
257 |     if (!readDataFile(XCORR_INPUT_FILE, xcorrVec)) return 1;
258 |     if (!readDataFile(THRESHOLD_INPUT_FILE, thresholdVec)) return 1;
259 | 
260 |     std::cout << "Loading reference output data..." << std::endl;
261 |     if (!readIntDataFile(REF_OUTPUT_FILE, refLocsVec)) return 1;
262 | 
263 |     // Basic input validation
264 |     if (xcorrVec.size() != thresholdVec.size()) {
265 |         std::cerr << "Error: Input xcorr size (" << xcorrVec.size()
266 |                   << ") does not match threshold size (" << thresholdVec.size() << ")" << std::endl;
267 |         return 1;
268 |     }
269 |     if (xcorrVec.empty()) {
270 |          std::cerr << "Error: Input data vectors are empty." << std::endl;
271 |          return 1;
272 |     }
273 | 
274 |     int numSamples = xcorrVec.size();
275 |     std::cout << "Number of samples to process: " << numSamples << std::endl;
276 | 
277 |     // --- Stream Preparation ---
278 |     hls::stream<Data_t> xcorrStream("xcorrStream");
279 |     hls::stream<Data_t> thresholdStream("thresholdStream");
280 |     hls::stream<Index_t> locationStream("locationStream");
281 | 
282 |     std::cout << "Populating input streams..." << std::endl;
283 |     for (int i = 0; i < numSamples; ++i) {
284 |         // Convert double to fixed-point Data_t
285 |         xcorrStream.write(static_cast<Data_t>(xcorrVec[i]));
286 |         thresholdStream.write(static_cast<Data_t>(thresholdVec[i]));
287 |     }
288 |     std::cout << "Input streams populated." << std::endl;
289 | 
290 |     // --- Call the DUT (Device Under Test) ---
291 |     std::cout << "Calling HLS peakPicker function..." << std::endl;
292 |     peakPicker(xcorrStream, thresholdStream, locationStream, numSamples);
293 |     std::cout << "HLS peakPicker function finished." << std::endl;
294 | 
295 |     // --- Collect Results ---
296 |     std::vector<Index_t> actualLocsVec;
297 |     std::cout << "Collecting results from output stream..." << std::endl;
298 |     while (!locationStream.empty()) {
299 |         actualLocsVec.push_back(locationStream.read());
300 |     }
301 |      std::cout << "Collected " << actualLocsVec.size() << " peak locations." << std::endl;
302 | 
303 |     // --- Verification ---
304 |     std::cout << "Comparing HLS results with reference..." << std::endl;
305 |     bool match = true;
306 |     int errorCount = 0;
307 | 
308 |     if (actualLocsVec.size() != refLocsVec.size()) {
309 |         std::cerr << "Error: Mismatch in number of detected peaks!" << std::endl;
310 |         std::cerr << "  Expected: " << refLocsVec.size() << std::endl;
311 |         std::cerr << "  Actual:   " << actualLocsVec.size() << std::endl;
312 |         match = false;
313 |     } else {
314 |         std::cout << "Number of peaks matches reference (" << refLocsVec.size() << ")." << std::endl;
315 |         for (size_t i = 0; i < refLocsVec.size(); ++i) {
316 |             // Direct comparison for integer indices
317 |             // Convert Index_t (ap_uint) to int for comparison if necessary
318 |             int actualLoc = static_cast<int>(actualLocsVec[i]);
319 |             int refLoc = refLocsVec[i];
320 | 
321 |             if (actualLoc != refLoc) {
322 |                 if (errorCount < 10) { // Print first few mismatches
323 |                    std::cerr << "Mismatch at index " << i << ": Expected = " << refLoc
324 |                              << ", Actual = " << actualLoc << std::endl;
325 |                 }
326 |                 match = false;
327 |                 errorCount++;
328 |             }
329 |         }
330 |          if (errorCount > 0) {
331 |              std::cerr << "Total mismatches found: " << errorCount << std::endl;
332 |          }
333 |     }
334 | 
335 |     // --- Report Results ---
336 |     if (match) {
337 |         std::cout << "--- Test PASSED ---" << std::endl;
338 |         std::cout << "HLS implementation output matches the reference output." << std::endl;
339 |         return 0; // Success
340 |     } else {
341 |         std::cout << "--- Test FAILED ---" << std::endl;
342 |         std::cout << "HLS implementation output does NOT match the reference output." << std::endl;
343 |         return 1; // Failure
344 |     }
345 | }
346 | ```
347 | 
348 | ---
349 | 
350 | **Description of Optimization Approaches and Impact:**
351 | 
352 | 1.  **`#pragma HLS PIPELINE II=1`:** Applied to the `main_loop`. This is the most critical optimization for throughput. It instructs HLS to attempt initiating a new loop iteration every clock cycle. This significantly increases the processing rate, allowing the design to handle high-speed streaming data. Without it, the loop latency would dictate the throughput, which would be much lower.
353 | 2.  **`#pragma HLS ARRAY_PARTITION variable=... complete dim=1`:** Applied to `xcorrBuffer` and `thresholdBuffer`. Since the `PIPELINE` pragma requires concurrent access to multiple elements of these buffers within a single clock cycle (for shifting and comparison), partitioning them completely breaks them down into individual registers. This eliminates memory access bottlenecks and allows the `II=1` target to be met. For small arrays like these (size 11), complete partitioning is feasible and efficient in terms of register usage vs. the performance gain. If the window were much larger, other partitioning schemes (e.g., `cyclic`, `block`) or mapping to BRAM/URAM might be considered to balance resources.
354 | 3.  **`#pragma HLS UNROLL`:** Applied to the small inner loops (`shift_loop`, `compare_loop`). This duplicates the loop body's hardware, allowing all operations within those loops to execute in parallel within a single iteration of the pipelined outer loop. This helps reduce the latency of the outer loop iteration and supports achieving `II=1`.
355 | 4.  **`hls::stream` Interface:** Using streams enables efficient data transfer between HLS blocks and external interfaces on the FPGA. Streams naturally fit pipelined and dataflow architectures, preventing large intermediate buffers and allowing continuous data processing.
356 | 5.  **Fixed-Point Arithmetic (`ap_fixed`)**: While primarily a functional requirement for bit-accuracy and hardware efficiency, using fixed-point types instead of floating-point significantly reduces resource usage (LUTs, FFs, DSPs) on the FPGA, making the design smaller, faster, and lower power. The choice of `W` and `I` directly impacts resource usage and numerical accuracy.
357 | 6.  **Dataflow (`#pragma HLS DATAFLOW`)**: *Not explicitly used* in this specific `peakPicker` function because the internal structure is a single, tightly coupled loop. Dataflow is more beneficial when a function can be broken down into distinct sequential tasks (e.g., TaskA -> TaskB -> TaskC) that can operate concurrently on different sets of data, communicating via streams or FIFOs. If `peakPicker` were part of a larger chain of processing blocks within a top-level function, `DATAFLOW` would be applied at that higher level to pipeline the execution of `peakPicker` relative to the other blocks.
358 | 
359 | **Impact Summary:**
360 | 
361 | *   **Throughput:** Significantly increased due to `PIPELINE II=1`. The design can process one input sample per clock cycle after the initial pipeline fill latency.
362 | *   **Latency:** The latency to produce the *first* output peak location is roughly `WINDOW_LENGTH` cycles (to fill the buffer) plus the pipeline depth of the main loop. The latency between subsequent outputs depends on the peak spacing but can be as low as 1 cycle if peaks occur consecutively.
363 | *   **Resource Usage:**
364 |     *   `ARRAY_PARTITION complete` increases register usage but avoids BRAM.
365 |     *   `PIPELINE` and `UNROLL` increase logic usage (LUTs/FFs) to achieve parallelism.
366 |     *   Using `ap_fixed` is much more resource-efficient than floating-point. The specific `W` and `I` values determine the exact resource cost.
367 | 
368 | This implementation provides a high-throughput, functionally correct HLS version of the MATLAB peak picker, ready for synthesis and integration into an FPGA design. Remember to validate and potentially adjust the `Data_t` fixed-point type based on your specific signal characteristics.


--------------------------------------------------------------------------------
/agent_log.txt:
--------------------------------------------------------------------------------
  1 | 2025-04-06 14:59:08,864 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
  2 | 2025-04-06 14:59:08,864 [INFO] Registered agent: code_generator
  3 | 2025-04-06 14:59:08,864 [INFO] Registered agent: debug_assistant
  4 | 2025-04-06 14:59:08,864 [INFO] Registered agent: makefile_generator
  5 | 2025-04-06 14:59:08,864 [INFO] Registered agent: performance_optimizer
  6 | 2025-04-06 14:59:08,864 [INFO] Registered agent: hls_builder
  7 | 2025-04-06 14:59:08,864 [INFO] Registered agent: documentation_generator
  8 | 2025-04-06 14:59:08,864 [INFO] Workflow defined with 9 steps
  9 | 2025-04-06 14:59:08,864 [INFO] Starting workflow execution
 10 | 2025-04-06 14:59:08,865 [INFO] Executing workflow step: generate_code
 11 | 2025-04-06 14:59:08,865 [INFO] Running agent code_generator for step generate_code
 12 | 2025-04-06 14:59:08,865 [INFO] Agent code_generator status: running
 13 | 2025-04-06 14:59:08,865 [INFO] Running command: /home/jielei/Projects/UTS/llm-fpga-design/.venv/bin/python3 /home/jielei/Projects/UTS/llm-fpga-design/scripts/generate_hls_code.py --matlab_file algorithms/peakPicker.m algorithms/peakPicker_tb.m --prompt /home/jielei/Projects/UTS/llm-fpga-design/prompts/hls_generation.md --output_dir /home/jielei/Projects/UTS/llm-fpga-design/implementations --model gemini-2.5-pro-exp-03-25
 14 | 2025-04-06 15:00:21,531 [INFO] Agent code_generator status: success
 15 | 2025-04-06 15:00:21,532 [INFO] Executing workflow step: generate_makefile
 16 | 2025-04-06 15:00:21,532 [INFO] Running agent makefile_generator for step generate_makefile
 17 | 2025-04-06 15:00:21,532 [INFO] Agent makefile_generator status: running
 18 | 2025-04-06 15:00:21,532 [INFO] Generating Makefile for peakPicker in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker
 19 | 2025-04-06 15:00:21,532 [INFO] Generated Makefile from template at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/Makefile
 20 | 2025-04-06 15:00:21,532 [INFO] Agent makefile_generator status: success
 21 | 2025-04-06 15:00:21,532 [INFO] Executing workflow step: build_csim
 22 | 2025-04-06 15:00:21,532 [INFO] Running agent hls_builder for step build_csim
 23 | 2025-04-06 15:00:21,532 [INFO] Agent hls_builder status: running
 24 | 2025-04-06 15:00:21,533 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csim
 25 | 2025-04-06 15:00:28,586 [ERROR] Agent hls_builder: Make failed with return code 2
 26 | 2025-04-06 15:00:28,586 [INFO] Agent hls_builder status: failed - Make failed with return code 2
 27 | 2025-04-06 15:00:28,586 [INFO] Starting debug cycle
 28 | 2025-04-06 15:00:28,586 [INFO] Executing workflow step: debug_errors
 29 | 2025-04-06 15:00:28,586 [INFO] Running agent debug_assistant for step debug_errors
 30 | 2025-04-06 15:00:28,586 [INFO] Agent debug_assistant status: running
 31 | 2025-04-06 15:00:28,586 [INFO] Running command: /home/jielei/Projects/UTS/llm-fpga-design/.venv/bin/python3 /home/jielei/Projects/UTS/llm-fpga-design/scripts/debug_assistant.py --error_log /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/proj_peakPicker/solution1/csim/report/peakPicker_csim.log --source_file /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.hpp /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker.cpp /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/peakPicker_tb.cpp --model gemini-2.5-pro-exp-03-25
 32 | 2025-04-06 15:02:12,412 [INFO] Agent debug_assistant status: success
 33 | 2025-04-06 15:02:12,412 [INFO] Executing workflow step: build_csim
 34 | 2025-04-06 15:02:12,412 [INFO] Running agent hls_builder for step build_csim
 35 | 2025-04-06 15:02:12,412 [INFO] Agent hls_builder status: running
 36 | 2025-04-06 15:02:12,413 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csim
 37 | 2025-04-06 15:02:19,744 [INFO] Agent hls_builder status: success
 38 | 2025-04-06 15:02:19,744 [INFO] Debug cycle completed successfully, continuing workflow
 39 | 2025-04-06 15:02:19,744 [INFO] Executing workflow step: build_csynth
 40 | 2025-04-06 15:02:19,744 [INFO] Running agent hls_builder for step build_csynth
 41 | 2025-04-06 15:02:19,744 [INFO] Agent hls_builder status: running
 42 | 2025-04-06 15:02:19,744 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make csynth
 43 | 2025-04-06 15:02:34,838 [INFO] Agent hls_builder status: success
 44 | 2025-04-06 15:02:34,839 [INFO] Executing workflow step: build_cosim
 45 | 2025-04-06 15:02:34,839 [INFO] Running agent hls_builder for step build_cosim
 46 | 2025-04-06 15:02:34,839 [INFO] Agent hls_builder status: running
 47 | 2025-04-06 15:02:34,839 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make cosim
 48 | 2025-04-06 15:03:00,392 [INFO] Agent hls_builder status: success
 49 | 2025-04-06 15:03:00,392 [INFO] Executing workflow step: export_ip
 50 | 2025-04-06 15:03:00,392 [INFO] Running agent hls_builder for step export_ip
 51 | 2025-04-06 15:03:00,392 [INFO] Agent hls_builder status: running
 52 | 2025-04-06 15:03:00,392 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make export_ip
 53 | 2025-04-06 15:03:18,971 [INFO] Agent hls_builder status: success
 54 | 2025-04-06 15:03:18,971 [INFO] Executing workflow step: build_impl
 55 | 2025-04-06 15:03:18,971 [INFO] Running agent hls_builder for step build_impl
 56 | 2025-04-06 15:03:18,972 [INFO] Agent hls_builder status: running
 57 | 2025-04-06 15:03:18,972 [INFO] Running make in /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker: make impl
 58 | 2025-04-06 15:06:56,841 [ERROR] Agent hls_builder: INFO: [Place 46-56] BUFG insertion identified 0 candidate nets. Inserted BUFG: 0, Replicated BUFG Driver: 0, Skipped due to Placement/Routing Conflicts: 0, Skipped due to Timing Degradation: 0, Skipped due to netlist editing failed: 0.
 59 | Number of Failed Nets               = 616
 60 | (Failed Nets is the sum of unrouted and partially routed nets)
 61 | Number of Failed Nets               = 0
 62 | (Failed Nets is the sum of unrouted and partially routed nets)
 63 | 2025-04-06 15:06:56,842 [INFO] Agent hls_builder status: failed - INFO: [Place 46-56] BUFG insertion identified 0 candidate nets. Inserted BUFG: 0, Replicated BUFG Driver: 0, Skipped due to Placement/Routing Conflicts: 0, Skipped due to Timing Degradation: 0, Skipped due to netlist editing failed: 0.
 64 | Number of Failed Nets               = 616
 65 | (Failed Nets is the sum of unrouted and partially routed nets)
 66 | Number of Failed Nets               = 0
 67 | (Failed Nets is the sum of unrouted and partially routed nets)
 68 | 2025-04-06 15:06:56,842 [INFO] Encountered error, running error handling: generate_documentation
 69 | 2025-04-06 15:06:56,842 [INFO] Executing workflow step: generate_documentation
 70 | 2025-04-06 15:06:56,842 [INFO] Running agent documentation_generator for step generate_documentation
 71 | 2025-04-06 15:06:56,842 [INFO] Agent documentation_generator status: running
 72 | 2025-04-06 15:06:56,842 [INFO] Analyzing performance reports...
 73 | 2025-04-06 15:06:57,128 [INFO] Analyzing LLM responses for insights...
 74 | 2025-04-06 15:06:57,128 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
 75 | 2025-04-06 15:06:57,131 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
 76 | 2025-04-06 15:06:57,137 [INFO] Collecting workflow execution data...
 77 | 2025-04-06 15:06:57,137 [INFO] Generating documentation...
 78 | 2025-04-06 15:06:57,138 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
 79 | 2025-04-06 15:10:16,909 [INFO] Agent documentation_generator status: success
 80 | 2025-04-06 15:10:16,910 [INFO] Workflow completed with status: success
 81 | 2025-04-06 15:10:16,910 [INFO] Agent workflow completed successfully
 82 | 2025-04-06 15:38:52,946 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
 83 | 2025-04-06 15:38:52,946 [INFO] Registered agent: code_generator
 84 | 2025-04-06 15:38:52,946 [INFO] Registered agent: debug_assistant
 85 | 2025-04-06 15:38:52,947 [INFO] Registered agent: makefile_generator
 86 | 2025-04-06 15:38:52,947 [INFO] Registered agent: performance_optimizer
 87 | 2025-04-06 15:38:52,947 [INFO] Registered agent: hls_builder
 88 | 2025-04-06 15:38:52,947 [INFO] Registered agent: documentation_generator
 89 | 2025-04-06 15:38:52,947 [INFO] Agent documentation_generator status: running
 90 | 2025-04-06 15:38:52,947 [INFO] Analyzing performance reports...
 91 | 2025-04-06 15:38:53,187 [INFO] Analyzing LLM responses for insights...
 92 | 2025-04-06 15:38:53,187 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
 93 | 2025-04-06 15:38:53,190 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
 94 | 2025-04-06 15:38:53,196 [INFO] Collecting workflow execution data...
 95 | 2025-04-06 15:38:53,196 [INFO] Generating documentation...
 96 | 2025-04-06 15:38:53,197 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
 97 | 2025-04-06 15:41:50,339 [INFO] Agent documentation_generator status: success
 98 | 2025-04-06 19:26:35,235 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
 99 | 2025-04-06 19:26:35,235 [INFO] Registered agent: code_generator
100 | 2025-04-06 19:26:35,236 [INFO] Registered agent: debug_assistant
101 | 2025-04-06 19:26:35,236 [INFO] Registered agent: makefile_generator
102 | 2025-04-06 19:26:35,236 [INFO] Registered agent: performance_optimizer
103 | 2025-04-06 19:26:35,236 [INFO] Registered agent: hls_builder
104 | 2025-04-06 19:26:35,236 [INFO] Registered agent: documentation_generator
105 | 2025-04-06 19:26:35,236 [INFO] Agent documentation_generator status: running
106 | 2025-04-06 19:26:35,236 [INFO] Analyzing performance reports...
107 | 2025-04-06 19:26:35,498 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
108 | 2025-04-06 19:26:35,498 [INFO] Analyzing LLM responses for insights...
109 | 2025-04-06 19:26:35,498 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
110 | 2025-04-06 19:26:35,501 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
111 | 2025-04-06 19:26:35,507 [INFO] Collecting workflow execution data...
112 | 2025-04-06 19:26:35,508 [INFO] Generating documentation...
113 | 2025-04-06 19:26:35,508 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
114 | 2025-04-06 19:29:54,444 [INFO] Agent documentation_generator status: success
115 | 2025-04-06 20:13:46,400 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
116 | 2025-04-06 20:13:46,400 [INFO] Registered agent: code_generator
117 | 2025-04-06 20:13:46,400 [INFO] Registered agent: debug_assistant
118 | 2025-04-06 20:13:46,400 [INFO] Registered agent: makefile_generator
119 | 2025-04-06 20:13:46,400 [INFO] Registered agent: performance_optimizer
120 | 2025-04-06 20:13:46,400 [INFO] Registered agent: hls_builder
121 | 2025-04-06 20:13:46,400 [INFO] Registered agent: documentation_generator
122 | 2025-04-06 20:13:46,400 [INFO] Agent documentation_generator status: running
123 | 2025-04-06 20:13:46,400 [INFO] Analyzing performance reports...
124 | 2025-04-06 20:13:46,635 [ERROR] Error generating performance report: Unknown format code 'f' for object of type 'str'
125 | 2025-04-06 20:13:46,635 [INFO] Analyzing LLM responses for insights...
126 | 2025-04-06 20:13:46,635 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
127 | 2025-04-06 20:13:46,638 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
128 | 2025-04-06 20:13:46,644 [INFO] Collecting workflow execution data...
129 | 2025-04-06 20:13:46,644 [INFO] Generating documentation...
130 | 2025-04-06 20:13:46,645 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
131 | 2025-04-06 20:22:38,283 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
132 | 2025-04-06 20:22:38,283 [INFO] Registered agent: code_generator
133 | 2025-04-06 20:22:38,283 [INFO] Registered agent: debug_assistant
134 | 2025-04-06 20:22:38,283 [INFO] Registered agent: makefile_generator
135 | 2025-04-06 20:22:38,283 [INFO] Registered agent: performance_optimizer
136 | 2025-04-06 20:22:38,283 [INFO] Registered agent: hls_builder
137 | 2025-04-06 20:22:38,283 [INFO] Registered agent: documentation_generator
138 | 2025-04-06 20:22:38,284 [INFO] Agent documentation_generator status: running
139 | 2025-04-06 20:22:38,284 [INFO] Analyzing performance reports...
140 | 2025-04-06 20:22:38,547 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
141 | 2025-04-06 20:22:38,547 [INFO] Analyzing LLM responses for insights...
142 | 2025-04-06 20:22:38,547 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
143 | 2025-04-06 20:22:38,550 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
144 | 2025-04-06 20:22:38,557 [INFO] Collecting workflow execution data...
145 | 2025-04-06 20:22:38,557 [INFO] Generating documentation...
146 | 2025-04-06 20:22:38,557 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
147 | 2025-04-06 20:26:01,193 [INFO] Agent documentation_generator status: success
148 | 2025-04-06 20:55:53,924 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
149 | 2025-04-06 20:55:53,924 [INFO] Registered agent: code_generator
150 | 2025-04-06 20:55:53,924 [INFO] Registered agent: debug_assistant
151 | 2025-04-06 20:55:53,924 [INFO] Registered agent: makefile_generator
152 | 2025-04-06 20:55:53,924 [INFO] Registered agent: performance_optimizer
153 | 2025-04-06 20:55:53,924 [INFO] Registered agent: hls_builder
154 | 2025-04-06 20:55:53,924 [INFO] Registered agent: documentation_generator
155 | 2025-04-06 20:55:53,924 [INFO] Agent documentation_generator status: running
156 | 2025-04-06 20:55:53,924 [INFO] Analyzing performance reports...
157 | 2025-04-06 20:55:54,201 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
158 | 2025-04-06 20:55:54,201 [INFO] Analyzing LLM responses for insights...
159 | 2025-04-06 20:55:54,201 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
160 | 2025-04-06 20:55:54,204 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
161 | 2025-04-06 20:55:54,210 [INFO] Collecting workflow execution data...
162 | 2025-04-06 20:55:54,210 [INFO] Generating documentation...
163 | 2025-04-06 20:55:54,211 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
164 | 2025-04-06 20:59:21,513 [INFO] Agent documentation_generator status: success
165 | 2025-04-06 21:21:18,615 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
166 | 2025-04-06 21:21:18,615 [INFO] Registered agent: code_generator
167 | 2025-04-06 21:21:18,615 [INFO] Registered agent: debug_assistant
168 | 2025-04-06 21:21:18,615 [INFO] Registered agent: makefile_generator
169 | 2025-04-06 21:21:18,616 [INFO] Registered agent: performance_optimizer
170 | 2025-04-06 21:21:18,616 [INFO] Registered agent: hls_builder
171 | 2025-04-06 21:21:18,616 [INFO] Registered agent: documentation_generator
172 | 2025-04-06 21:21:18,617 [INFO] Agent documentation_generator status: running
173 | 2025-04-06 21:21:18,617 [INFO] Analyzing performance reports...
174 | 2025-04-06 21:21:18,895 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
175 | 2025-04-06 21:21:18,895 [INFO] Analyzing LLM responses for insights...
176 | 2025-04-06 21:21:18,895 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
177 | 2025-04-06 21:21:18,898 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
178 | 2025-04-06 21:21:18,904 [INFO] Collecting workflow execution data...
179 | 2025-04-06 21:21:18,904 [INFO] Generating documentation...
180 | 2025-04-06 21:21:18,905 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
181 | 2025-04-06 21:24:27,350 [INFO] Agent documentation_generator status: success
182 | 2025-04-06 21:56:03,569 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
183 | 2025-04-06 21:56:03,570 [INFO] Registered agent: code_generator
184 | 2025-04-06 21:56:03,570 [INFO] Registered agent: debug_assistant
185 | 2025-04-06 21:56:03,570 [INFO] Registered agent: makefile_generator
186 | 2025-04-06 21:56:03,570 [INFO] Registered agent: performance_optimizer
187 | 2025-04-06 21:56:03,570 [INFO] Registered agent: hls_builder
188 | 2025-04-06 21:56:03,570 [INFO] Registered agent: documentation_generator
189 | 2025-04-06 21:56:03,570 [INFO] Agent documentation_generator status: running
190 | 2025-04-06 21:56:03,570 [INFO] Analyzing performance reports...
191 | 2025-04-06 21:56:03,843 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
192 | 2025-04-06 21:56:03,843 [INFO] Analyzing LLM responses for insights...
193 | 2025-04-06 21:56:03,843 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
194 | 2025-04-06 21:56:03,846 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
195 | 2025-04-06 21:56:03,852 [INFO] Collecting workflow execution data...
196 | 2025-04-06 21:56:03,852 [INFO] Generating documentation...
197 | 2025-04-06 21:56:03,853 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
198 | 2025-04-06 21:59:23,375 [INFO] Agent documentation_generator status: success
199 | 2025-04-06 22:18:02,921 [INFO] Prompt manager initialized with directory: /home/jielei/Projects/UTS/llm-fpga-design/prompts
200 | 2025-04-06 22:18:02,921 [INFO] Registered agent: code_generator
201 | 2025-04-06 22:18:02,922 [INFO] Registered agent: debug_assistant
202 | 2025-04-06 22:18:02,922 [INFO] Registered agent: makefile_generator
203 | 2025-04-06 22:18:02,922 [INFO] Registered agent: performance_optimizer
204 | 2025-04-06 22:18:02,922 [INFO] Registered agent: hls_builder
205 | 2025-04-06 22:18:02,922 [INFO] Registered agent: documentation_generator
206 | 2025-04-06 22:18:02,922 [INFO] Agent documentation_generator status: running
207 | 2025-04-06 22:18:02,922 [INFO] Analyzing performance reports...
208 | 2025-04-06 22:18:03,197 [INFO] Generated performance report at /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/performance_metrics.md
209 | 2025-04-06 22:18:03,197 [INFO] Analyzing LLM responses for insights...
210 | 2025-04-06 22:18:03,198 [INFO] Analyzing code generation LLM response: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/llm_response.md
211 | 2025-04-06 22:18:03,200 [INFO] Analyzing debug reports in: /home/jielei/Projects/UTS/llm-fpga-design/implementations/peakPicker/debug_reports
212 | 2025-04-06 22:18:03,207 [INFO] Collecting workflow execution data...
213 | 2025-04-06 22:18:03,207 [INFO] Generating documentation...
214 | 2025-04-06 22:18:03,207 [INFO] Calling LLM with model gemini-2.5-pro-exp-03-25 to generate documentation...
215 | 2025-04-06 22:21:23,550 [INFO] Agent documentation_generator status: success
216 | 


--------------------------------------------------------------------------------