├── .gitignore ├── docs ├── sdft.png ├── overview.png ├── fft_vs_sdft.png └── fpga_usage.md ├── tests ├── twiddle_rom_tb.v ├── top_tb_header.vh ├── gtk-twiddle_rom.gtkw ├── gtk-complex_mult.gtkw ├── gtk-freq_bram.gtkw ├── localparams.vh ├── complex_mult_tb.v ├── gtk-sdft.gtkw ├── top_tb.v ├── sdft_tb.v ├── freq_bram_tb.v └── gtk-top.gtkw ├── hdl ├── icestick.pcf ├── complex_mult.v ├── twiddle_rom.v ├── freq_bram.v ├── 8k.pcf ├── VgaSyncGen.v ├── top.v └── sdft.v ├── python ├── parse_verilog_header.py ├── multiply_complex.py ├── gen_twiddle.py ├── sdft.py ├── read_vcd.py └── Verilog_VCD.py ├── Makefile └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | *.swp 3 | *.pyc 4 | *.list 5 | -------------------------------------------------------------------------------- /docs/sdft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattvenn/fpga-sdft/HEAD/docs/sdft.png -------------------------------------------------------------------------------- /docs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattvenn/fpga-sdft/HEAD/docs/overview.png -------------------------------------------------------------------------------- /docs/fft_vs_sdft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattvenn/fpga-sdft/HEAD/docs/fft_vs_sdft.png -------------------------------------------------------------------------------- /tests/twiddle_rom_tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | module test; 3 | 4 | reg reset = 0; 5 | reg [3:0] addr = 0; 6 | 7 | initial begin 8 | $dumpfile("test.vcd"); 9 | $dumpvars(0,test); 10 | # 32 11 | $finish; 12 | end 13 | 14 | // clock 15 | reg clk = 0; 16 | always #1 clk = !clk; 17 | always #1 addr <= addr + 1; 18 | 19 | twiddle_rom dut(.clk (clk), .addr(addr)); 20 | 21 | endmodule // test 22 | -------------------------------------------------------------------------------- /hdl/icestick.pcf: -------------------------------------------------------------------------------- 1 | set_io --warn-no-port LED 95 # centre green 2 | set_io --warn-no-port clk 21 3 | 4 | 5 | set_io --warn-no-port adc[0] 44 6 | set_io --warn-no-port adc[1] 45 7 | set_io --warn-no-port adc[2] 47 8 | set_io --warn-no-port adc[3] 48 9 | 10 | 11 | set_io --warn-no-port vga_r 78 12 | set_io --warn-no-port vga_g 79 13 | set_io --warn-no-port vga_b 80 14 | set_io --warn-no-port hsync 87 15 | set_io --warn-no-port vsync 88 16 | 17 | -------------------------------------------------------------------------------- /hdl/complex_mult.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | module complex_mult 3 | #( 4 | parameter data_in_w = 8, 5 | parameter data_out_w = 16 6 | ) 7 | ( 8 | input wire signed [data_in_w-1:0] a_real, 9 | input wire signed [data_in_w-1:0] a_imag, 10 | input wire signed [data_in_w-1:0] b_real, 11 | input wire signed [data_in_w-1:0] b_imag, 12 | 13 | output wire signed [data_out_w-1:0] out_real, 14 | output wire signed [data_out_w-1:0] out_imag 15 | ); 16 | 17 | assign out_real = a_real * b_real - a_imag * b_imag; 18 | assign out_imag = a_real * b_imag + a_imag * b_real; 19 | 20 | endmodule 21 | 22 | -------------------------------------------------------------------------------- /python/parse_verilog_header.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class ParseParams(): 4 | 5 | def __init__(self, filename): 6 | self.filename = filename 7 | self.params = {} 8 | 9 | def parse(self): 10 | with open(self.filename) as fh: 11 | for line in fh.readlines(): 12 | m = re.search('^localparam (\w+) = (\d+);', line) 13 | if m is not None: 14 | self.params[m.group(1)] = int(m.group(2)) 15 | return self.params 16 | 17 | 18 | 19 | if __name__ == '__main__': 20 | pp = ParseParams('tests/localparams.vh') 21 | params = pp.parse() 22 | from pprint import pprint 23 | pprint(params) 24 | -------------------------------------------------------------------------------- /tests/top_tb_header.vh: -------------------------------------------------------------------------------- 1 | `timescale 1ps / 1ps 2 | 3 | `ifdef __ICARUS__ 4 | `define finish_and_return(code) $finish_and_return(code) 5 | `else 6 | `define finish_and_return(code) $finish 7 | `endif 8 | 9 | `define assert(msg, signal, value) \ 10 | if ((signal) !== value) begin \ 11 | $display("%d ERROR (%m): %s. signal != value", $time, msg); \ 12 | $display(" actual: %x", signal); \ 13 | $display(" expected: %x", value); \ 14 | end 15 | 16 | `define assert_true(msg, signal) \ 17 | if (!(signal)) begin \ 18 | $display("%d ERROR (%m): %s. (signal) == FALSE", $time, msg); \ 19 | end 20 | 21 | -------------------------------------------------------------------------------- /tests/gtk-twiddle_rom.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI 3 | [*] Tue Jun 5 15:25:58 2018 4 | [*] 5 | [dumpfile] "/home/matt/work/fpga/fft/build/twiddle_rom.vcd" 6 | [dumpfile_mtime] "Tue Jun 5 15:25:38 2018" 7 | [dumpfile_size] 705 8 | [savefile] "/home/matt/work/fpga/fft/tests/gtk-twiddle_rom.gtkw" 9 | [timestart] 0 10 | [size] 1758 1127 11 | [pos] -1 -1 12 | *-3.386754 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 | [treeopen] test. 14 | [sst_width] 253 15 | [signals_width] 195 16 | [sst_expanded] 1 17 | [sst_vpaned_height] 343 18 | @28 19 | test.dut.clk 20 | @22 21 | test.dut.addr[3:0] 22 | test.dut.dout_imaj[15:0] 23 | @23 24 | test.dut.dout_real[15:0] 25 | [pattern_trace] 1 26 | [pattern_trace] 0 27 | -------------------------------------------------------------------------------- /tests/gtk-complex_mult.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI 3 | [*] Mon Jun 11 15:18:02 2018 4 | [*] 5 | [dumpfile] "/home/matt/work/fpga/fft/build/complex_mult.vcd" 6 | [dumpfile_mtime] "Mon Jun 11 15:17:11 2018" 7 | [dumpfile_size] 1081 8 | [savefile] "/home/matt/work/fpga/fft/tests/gtk-complex_mult.gtkw" 9 | [timestart] 0 10 | [size] 1922 991 11 | [pos] -1 -1 12 | *-1.927323 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 | [sst_width] 253 14 | [signals_width] 184 15 | [sst_expanded] 1 16 | [sst_vpaned_height] 296 17 | @28 18 | test.clk 19 | @200 20 | - 21 | @420 22 | test.a_real[7:0] 23 | test.a_imag[7:0] 24 | @200 25 | - 26 | @420 27 | test.b_real[7:0] 28 | test.b_imag[7:0] 29 | @200 30 | - 31 | @421 32 | test.out_imag[15:0] 33 | test.out_real[15:0] 34 | [pattern_trace] 1 35 | [pattern_trace] 0 36 | -------------------------------------------------------------------------------- /tests/gtk-freq_bram.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI 3 | [*] Wed Jun 13 18:01:04 2018 4 | [*] 5 | [dumpfile] "/home/matt/work/fpga/fft/build/freq_bram.vcd" 6 | [dumpfile_mtime] "Wed Jun 13 18:00:47 2018" 7 | [dumpfile_size] 47367 8 | [savefile] "/home/matt/work/fpga/fft/tests/gtk-freq_bram.gtkw" 9 | [timestart] 0 10 | [size] 2018 1021 11 | [pos] -1 -1 12 | *-8.000000 37 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 | [treeopen] test. 14 | [sst_width] 253 15 | [signals_width] 376 16 | [sst_expanded] 1 17 | [sst_vpaned_height] 306 18 | @22 19 | test.freq_bram_0.d_in[19:0] 20 | @24 21 | test.freq_bram_0.r_addr[6:0] 22 | @22 23 | test.freq_bram_0.w_addr[6:0] 24 | @28 25 | test.freq_bram_0.r_en 26 | test.freq_bram_0.w_en 27 | @23 28 | test.freq_bram_0.d_out[19:0] 29 | [pattern_trace] 1 30 | [pattern_trace] 0 31 | -------------------------------------------------------------------------------- /hdl/twiddle_rom.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | module twiddle_rom 3 | #( 4 | parameter FILE_REAL = "hdl/twiddle_real.list", 5 | parameter FILE_IMAJ = "hdl/twiddle_imag.list", 6 | parameter addr_w = 7, 7 | parameter data_w = 8 8 | ) 9 | ( 10 | input wire clk, 11 | input wire [addr_w-1:0] addr, 12 | output reg [data_w-1:0] dout_real, 13 | output reg [data_w-1:0] dout_imag 14 | ); 15 | 16 | reg [data_w-1:0] rom_real [(1 << addr_w)-1:0]; 17 | reg [data_w-1:0] rom_imag [(1 << addr_w)-1:0]; 18 | 19 | initial begin 20 | if (FILE_REAL) $readmemh(FILE_REAL, rom_real); 21 | if (FILE_IMAJ) $readmemh(FILE_IMAJ, rom_imag); 22 | end 23 | 24 | always @(posedge clk) begin 25 | dout_real = rom_real[addr]; 26 | dout_imag = rom_imag[addr]; 27 | end 28 | 29 | endmodule 30 | -------------------------------------------------------------------------------- /tests/localparams.vh: -------------------------------------------------------------------------------- 1 | // fft settings 2 | localparam freq_bins = 32; 3 | localparam bin_addr_w = $clog2(freq_bins); 4 | localparam data_width = 8; 5 | localparam freq_data_w = 20; // to prevent overflow with multiplies and adds 6 | 7 | // test settings 8 | localparam sample_low = 20; 9 | localparam sample_high = 150; 10 | 11 | // actually way too fast, results in bram being updated many times per video frame 12 | localparam FFT_READ_CYCLES = 100; // MAX 127! every X cycles read the next freq/imag bin into BRAM 13 | 14 | // fsm values 15 | localparam STATE_WAIT_FFT = 0; 16 | localparam STATE_WAIT_START = 1; 17 | localparam STATE_PROCESS = 2; 18 | localparam STATE_READ = 3; 19 | localparam STATE_WRITE_BRAM = 4; 20 | 21 | // screen settings 22 | localparam screen_height = 480; 23 | 24 | // bar settings 25 | localparam bar_height = screen_height / freq_bins; 26 | localparam bar_height_counter_w = $clog2(bar_height); 27 | -------------------------------------------------------------------------------- /hdl/freq_bram.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | module freq_bram 3 | #( 4 | parameter addr_w = 7, 5 | parameter data_w = 8, 6 | parameter FILE = "hdl/freq_bram.list" 7 | ) 8 | ( 9 | input wire r_clk, 10 | input wire w_clk, 11 | input wire [addr_w-1:0] r_addr, 12 | input wire [addr_w-1:0] w_addr, 13 | input wire w_en, 14 | input wire r_en, 15 | input wire [data_w-1:0] d_in, 16 | 17 | output reg [data_w-1:0] d_out 18 | ); 19 | 20 | reg [data_w-1:0] ram [(1 << addr_w)-1:0]; 21 | 22 | initial begin 23 | if (FILE) $readmemh(FILE, ram); 24 | d_out <= 0; 25 | end 26 | 27 | always @(posedge w_clk) begin 28 | if(w_en) 29 | ram[w_addr] <= d_in; 30 | end 31 | 32 | always @(posedge r_clk) begin 33 | if(r_en) 34 | d_out <= ram[r_addr]; 35 | end 36 | 37 | endmodule 38 | 39 | -------------------------------------------------------------------------------- /python/multiply_complex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from cmath import cos, sin, pi 3 | from scipy import signal 4 | import numpy as np 5 | 6 | size = 5 7 | rotations = range(5) 8 | 9 | N = size * size 10 | coeffs = [] 11 | freqs = [] 12 | in_s = [] 13 | sig_counter = 0 14 | 15 | def init_coeffs(): 16 | for i in range(N): 17 | a = 2.0 * pi * i / N 18 | coeff = complex(cos(a),sin(a)) 19 | coeffs.append(coeff) 20 | print(coeff) 21 | 22 | init_coeffs() 23 | import matplotlib.pyplot as plt 24 | 25 | fig, ax = plt.subplots(nrows=size, ncols=size) #subplot_kw=dict(projection='polar')) 26 | plot_num = 0 27 | for row in ax: 28 | for col in row: 29 | points = [coeffs[plot_num]] 30 | for r in rotations: 31 | points.append(points[r] * coeffs[plot_num]) 32 | reals = [p.real for p in points] 33 | imags = [p.imag for p in points] 34 | col.plot(reals, imags, marker='o', markersize=3) 35 | col.set_ylim([-1.2,1.2]) 36 | col.set_xlim([-1.2,1.2]) 37 | plot_num += 1 38 | 39 | plt.show() 40 | -------------------------------------------------------------------------------- /tests/complex_mult_tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | `include "tests/top_tb_header.vh" 3 | module test; 4 | 5 | reg reset = 0; 6 | wire signed [15:0] out_real; 7 | wire signed [15:0] out_imag; 8 | 9 | integer i; 10 | initial begin 11 | $dumpfile("test.vcd"); 12 | $dumpvars(0,test); 13 | b_real = 2; 14 | b_imag = -1; 15 | a_real = 3; 16 | a_imag = 1; 17 | `assert("1", out_real, 16'sd7); 18 | `assert("1", out_imag, -16'sd1); 19 | # 4 20 | b_real = -2; 21 | b_imag = -2; 22 | a_real = -3; 23 | a_imag = -3; 24 | `assert("2", out_real, 16'sd0); 25 | `assert("2", out_imag, 16'sd12); 26 | # 4 27 | b_real = 2; 28 | b_imag = 4; 29 | a_real = 3; 30 | a_imag = 4; 31 | `assert("3", out_real, -16'sd10); 32 | `assert("3", out_imag, 16'sd20); 33 | # 4 34 | 35 | $finish; 36 | end 37 | 38 | // clock 39 | reg clk = 0; 40 | always #1 clk = !clk; 41 | 42 | reg signed [7:0] b_real; 43 | reg signed [7:0] b_imag; 44 | reg signed [7:0] a_real; 45 | reg signed [7:0] a_imag; 46 | 47 | complex_mult complex_mult_dut(.a_real(a_real), .a_imag(a_imag), .b_real(b_real), .b_imag(b_imag), .out_real(out_real), .out_imag(out_imag)); 48 | 49 | 50 | endmodule // test 51 | 52 | -------------------------------------------------------------------------------- /tests/gtk-sdft.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI 3 | [*] Wed Jun 13 17:02:42 2018 4 | [*] 5 | [dumpfile] "(null)" 6 | [savefile] "/home/matt/work/fpga/fft/tests/gtk-sdft.gtkw" 7 | [timestart] 678 8 | [size] 1515 876 9 | [pos] 3066 1453 10 | *-7.000000 799 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 | [treeopen] test. 12 | [treeopen] test.dut. 13 | [sst_width] 352 14 | [signals_width] 418 15 | [sst_expanded] 1 16 | [sst_vpaned_height] 429 17 | @28 18 | test.clk 19 | test.reset 20 | @420 21 | test.sample[7:0] 22 | @28 23 | test.start 24 | test.ready 25 | test.dut.read 26 | @200 27 | - 28 | @25 29 | test.dut.cycles[15:0] 30 | @22 31 | test.dut.state[3:0] 32 | @420 33 | test.dut.sample[7:0] 34 | test.dut.delta[8:0] 35 | @200 36 | - 37 | @420 38 | test.dut.\frequency_bins_real[0][19:0] 39 | test.dut.\frequency_bins_imag[0][19:0] 40 | test.dut.\frequency_bins_real[1][19:0] 41 | test.dut.\frequency_bins_imag[1][19:0] 42 | test.dut.\frequency_bins_imag[2][19:0] 43 | test.dut.\frequency_bins_real[2][19:0] 44 | test.dut.\frequency_bins_real[3][19:0] 45 | test.dut.\frequency_bins_imag[3][19:0] 46 | test.dut.\frequency_bins_real[8][19:0] 47 | test.dut.\frequency_bins_imag[8][19:0] 48 | @200 49 | - 50 | - 51 | @420 52 | test.dut.twid_real[7:0] 53 | test.dut.twid_imag[7:0] 54 | @200 55 | - 56 | - 57 | - 58 | - 59 | [pattern_trace] 1 60 | [pattern_trace] 0 61 | -------------------------------------------------------------------------------- /hdl/8k.pcf: -------------------------------------------------------------------------------- 1 | ################################### 2 | # Physical constraints file (pcf) # 3 | # for iCE40HX8K-CT256 # 4 | ################################### 5 | 6 | 7 | ### Clock 8 | set_io clk J3 9 | 10 | ### LEDs 11 | set_io --warn-no-port LED[0] B5 12 | set_io --warn-no-port LED[1] B4 13 | set_io --warn-no-port LED[2] A2 14 | set_io --warn-no-port LED[3] A1 15 | set_io --warn-no-port LED[4] C5 16 | set_io --warn-no-port LED[5] C4 17 | set_io --warn-no-port LED[6] B3 18 | set_io --warn-no-port LED[7] C3 19 | 20 | set_io --warn-no-port hsync P1 21 | set_io --warn-no-port vsync N3 22 | set_io --warn-no-port vga_r M2 23 | set_io --warn-no-port vga_g L3 24 | set_io --warn-no-port vga_b K3 25 | 26 | ### ADC 27 | set_io --warn-no-port adc[7] B16 28 | set_io --warn-no-port adc[6] C16 29 | set_io --warn-no-port adc[5] D14 30 | set_io --warn-no-port adc[4] D16 31 | set_io --warn-no-port adc[3] D15 32 | set_io --warn-no-port adc[2] E16 33 | set_io --warn-no-port adc[1] E14 34 | set_io --warn-no-port adc[0] F16 35 | 36 | set_io --warn-no-port adc_clk J15 37 | set_io --warn-no-port adc_shutdown H14 38 | 39 | ### GPIO 40 | set_io --warn-no-port gpio[0] G16 41 | set_io --warn-no-port gpio[1] F15 42 | ### UART (FTDI Channel B) 43 | #set_io RS232_RX_i B10 44 | #set_io RS232_TX_o B12 45 | #set_io RTSn B13 46 | #set_io CTSn A15 47 | #set_io DTRn A16 48 | #set_io DSRn B14 49 | #set_io DCDn B15 50 | 51 | -------------------------------------------------------------------------------- /python/gen_twiddle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from parse_verilog_header import ParseParams 4 | import math 5 | 6 | if len(sys.argv) != 2: 7 | exit("give params file as first arg") 8 | 9 | params = ParseParams(sys.argv[1]).parse() 10 | N = params['freq_bins'] 11 | width = params['data_width'] 12 | 13 | max_val = (2 ** width - 1)/2 14 | 15 | 16 | def to_bytes(n, length, endianess='big'): 17 | h = '%x' % n 18 | s = ('0'*(len(h) % 2) + h).zfill(length*2).decode('hex') 19 | return s if endianess == 'big' else s[::-1] 20 | 21 | def hex2(n): 22 | return hex (n & 0xffffffff)[:-1] 23 | 24 | def hex3(n): 25 | return "0x%s"%("00000000%x"%(n&0xffffffff))[-8:] 26 | 27 | def gen_twiddle(): 28 | real_fh = open("twiddle_real.list", 'w') 29 | imag_fh = open("twiddle_imag.list", 'w') 30 | coeffs = [] 31 | for i in range(int(N)): 32 | cos_v = (max_val * math.cos(2 * math.pi * i / N)) 33 | sin_v = (max_val * math.sin(2 * math.pi * i / N)) 34 | coeffs.append(complex(cos_v, sin_v)) 35 | print("%7.2f %7.2f -> %s %s" % (cos_v, sin_v, hex3(int(cos_v)), hex3(int(sin_v)))) 36 | real_fh.write(hex3(int(cos_v)) + "\n") 37 | imag_fh.write(hex3(int(sin_v)) + "\n") 38 | return coeffs 39 | 40 | def gen_freq_bram(): 41 | bram_fh = open("freq_bram.list", 'w') 42 | for i in range(int(N)): 43 | bram_fh.write(hex3(i) + "\n") 44 | 45 | if __name__ == '__main__': 46 | print("N: %d, width: %d, max (signed) %d" % (N, width, max_val)) 47 | gen_twiddle() 48 | gen_freq_bram() 49 | 50 | -------------------------------------------------------------------------------- /tests/top_tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | `include "tests/top_tb_header.vh" 3 | module test; 4 | `include "tests/localparams.vh" 5 | 6 | 7 | integer i, j; 8 | 9 | reg [data_width-1:0] sample = 0; 10 | 11 | wire [data_width-1:0] d_out; 12 | reg [data_width-1:0] d_in = 0; 13 | reg w_en = 0; 14 | reg r_en = 0; 15 | 16 | 17 | initial begin 18 | $dumpfile("test.vcd"); 19 | $dumpvars(0,test); 20 | for (i = 0 ; i < freq_bins ; i = i + 1) begin 21 | $dumpvars(1, top_0.freq_bram_0.ram[i]); 22 | $dumpvars(2, top_0.sdft_0.frequency_bins_real[i]); 23 | $dumpvars(3, top_0.sdft_0.frequency_bins_imag[i]); 24 | end 25 | 26 | while(top_0.read_cycles < freq_bins * 2) begin 27 | for (i = 0; i < 2; i = i + 1) begin 28 | for (j = 0; j < 3; j = j + 1) begin 29 | sample <= sample_low; 30 | wait(top_0.state == STATE_WAIT_START); 31 | wait(top_0.state == STATE_PROCESS); 32 | end 33 | for (j = 0; j < 3; j = j + 1) begin 34 | sample <= sample_high; 35 | wait(top_0.state == STATE_WAIT_START); 36 | wait(top_0.state == STATE_PROCESS); 37 | end 38 | end 39 | end 40 | $display("fft cycles: %d", top_0.sdft_0.cycles); 41 | 42 | $finish; 43 | end 44 | 45 | // clock 46 | reg clk = 0; 47 | always #1 clk = !clk; 48 | 49 | top top_0(.clk(clk), .adc(sample)); 50 | 51 | endmodule 52 | 53 | -------------------------------------------------------------------------------- /tests/sdft_tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | module test; 3 | 4 | `include "tests/localparams.vh" 5 | 6 | reg reset = 0; 7 | reg signed [data_width-1:0] sample = 0; 8 | reg start = 0; 9 | reg read = 0; 10 | reg [bin_addr_w-1:0] bin_addr = 0; 11 | 12 | wire ready; 13 | wire [freq_data_w-1:0] out_imag; 14 | wire [freq_data_w-1:0] out_real; 15 | 16 | integer i, j; 17 | initial begin 18 | $dumpfile("test.vcd"); 19 | $dumpvars(0,test); 20 | for (i = 0 ; i < freq_bins ; i = i + 1) begin 21 | $dumpvars(1, dut.samples[i]); 22 | $dumpvars(2, dut.frequency_bins_real[i]); 23 | $dumpvars(3, dut.frequency_bins_imag[i]); 24 | end 25 | 26 | while(dut.cycles < 352) begin 27 | for (i = 0; i < 2; i = i + 1) begin 28 | for (j = 0; j < 3; j = j + 1) begin 29 | $display("cycle: %d %d", j, sample); 30 | wait(ready == 1); 31 | sample <= sample_low; 32 | start <= 1; 33 | wait(ready == 0); 34 | start <= 0; 35 | end 36 | for (j = 0; j < 3; j = j + 1) begin 37 | $display("cycle: %d %d", j, sample); 38 | wait(ready == 1); 39 | sample <= sample_high; 40 | start <= 1; 41 | wait(ready == 0); 42 | start <= 0; 43 | end 44 | end 45 | end 46 | 47 | $display("fft cycles: %d", dut.cycles); 48 | 49 | // read some values 50 | bin_addr <= 0; 51 | read <= 1; 52 | wait(ready == 0); 53 | read <= 0; 54 | # 4 55 | bin_addr <= 1; 56 | read <= 1; 57 | wait(ready == 0); 58 | read <= 0; 59 | # 4 60 | 61 | $finish; 62 | end 63 | 64 | // clock 65 | reg clk = 0; 66 | always #1 clk = !clk; 67 | 68 | sdft #( .data_width(data_width), .freq_bins(freq_bins), .freq_w(freq_data_w)) dut(.clk (clk), .sample(sample), .start(start), .ready(ready), .bin_addr(bin_addr), .read(read), .bin_out_imag(out_imag), .bin_out_real(out_real)); 69 | 70 | 71 | endmodule // test 72 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE = ct256 2 | DEVICE = hx8k 3 | SRC_DIR = hdl 4 | TEST_DIR = tests 5 | DOCS_DIR = docs 6 | BUILD_DIR = build 7 | PROJ = $(BUILD_DIR)/fft 8 | PIN_DEF = $(SRC_DIR)/8k.pcf 9 | SHELL := /bin/bash # Use bash syntax 10 | ICESTORM_DIR = ~/.apio/packages/toolchain-icestorm/bin/ 11 | ICESTORM_DIR = /usr/bin/ 12 | 13 | MODULES = sdft.v VgaSyncGen.v twiddle_rom.v freq_bram.v # complex_mult.v 14 | LIST = twiddle_imag.list twiddle_real.list freq_bram.list 15 | VERILOG = top.v $(MODULES) 16 | SRC = $(addprefix $(SRC_DIR)/, $(VERILOG)) 17 | LISTS = $(addprefix $(SRC_DIR)/, $(LIST)) 18 | 19 | all: $(PROJ).bin $(PROJ).rpt 20 | 21 | # fft configuration in localparams.vh 22 | PARAMS = $(TEST_DIR)/localparams.vh 23 | 24 | # $@ The file name of the target of the rule.rule 25 | # $< first pre requisite 26 | # $^ names of all preerquisites 27 | 28 | # rules for building the blif file 29 | $(BUILD_DIR)/%.blif: $(SRC) 30 | $(ICESTORM_DIR)/yosys -p "synth_ice40 -top top -blif $@" $^ | tee $(BUILD_DIR)/build.log 31 | 32 | # asc 33 | $(BUILD_DIR)/%.asc: $(PIN_DEF) $(BUILD_DIR)/%.blif 34 | arachne-pnr --device 8k --package $(PACKAGE) -p $^ -o $@ 35 | #arachne-pnr -d $(subst hx,,$(subst lp,,$(DEVICE))) -o $@ -p $^ 36 | 37 | # bin, for programming 38 | $(BUILD_DIR)/%.bin: $(BUILD_DIR)/%.asc 39 | icepack $< $@ 40 | 41 | # timing 42 | $(BUILD_DIR)/%.rpt: $(BUILD_DIR)/%.asc 43 | icetime -d $(DEVICE) -mtr $@ $< 44 | 45 | # rules for simple tests with one verilog module per test bench 46 | $(BUILD_DIR)/%.out: $(TEST_DIR)/%_tb.v $(SRC) list 47 | iverilog -o $(basename $@).out $^ 48 | 49 | $(BUILD_DIR)/%.vcd: $(BUILD_DIR)/%.out 50 | vvp $< # -fst 51 | mv test.vcd $@ 52 | 53 | prog: $(PROJ).bin 54 | iceprog $< 55 | 56 | list: 57 | cd hdl; ../python/gen_twiddle.py ../$(PARAMS) 58 | 59 | model-sdft: 60 | cd python; python3 sdft.py 61 | 62 | debug-%: $(BUILD_DIR)/%.vcd $(TEST_DIR)/gtk-%.gtkw $(PARAMS) list 63 | gtkwave $^ 64 | 65 | read-sdft-vcd: 66 | cd python; python3 read_vcd.py ../build/sdft.vcd ../$(PARAMS) 67 | 68 | read-top-vcd: 69 | cd python; python3 read_vcd.py ../build/top.vcd ../$(PARAMS) 70 | 71 | show-%: $(SRC_DIR)/%.v 72 | yosys -p "read_verilog $<; proc; opt; show -colors 2 -width -signed" 73 | 74 | clean: 75 | rm -f $(BUILD_DIR)/* 76 | # rm -f $(SRC_DIR)/*list 77 | 78 | #secondary needed or make will remove useful intermediate files 79 | .SECONDARY: 80 | .PHONY: all prog clean 81 | 82 | -------------------------------------------------------------------------------- /tests/freq_bram_tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | `include "tests/top_tb_header.vh" 3 | module test; 4 | 5 | localparam data_w = 20; 6 | localparam addr_w = 7; 7 | localparam num_tests = 2 ** addr_w; 8 | integer i; 9 | 10 | wire [data_w-1:0] d_out; 11 | reg [data_w-1:0] d_in = 0; 12 | reg w_en = 0; 13 | reg r_en = 0; 14 | reg [addr_w-1:0] r_addr = 0; 15 | reg [addr_w-1:0] w_addr = 0; 16 | 17 | 18 | initial begin 19 | $dumpfile("test.vcd"); 20 | $dumpvars(0,test); 21 | // read the data 22 | for(i = 0; i < num_tests; i = i + 1) begin 23 | r_addr <= i; 24 | r_en <= 1'b1; 25 | # 2; 26 | `assert("read FILE data", d_out, i); 27 | r_en <= 1'b0; 28 | # 2; 29 | end 30 | // write the data 31 | for(i = 0; i < num_tests; i = i + 1) begin 32 | w_addr <= i; 33 | d_in <= i; 34 | w_en <= 1'b1; 35 | # 2; 36 | w_en <= 1'b0; 37 | # 2; 38 | end 39 | // read the data 40 | for(i = 0; i < num_tests; i = i + 1) begin 41 | r_addr <= i; 42 | r_en <= 1'b1; 43 | # 2; 44 | `assert("out data", d_out, i); 45 | r_en <= 1'b0; 46 | # 2; 47 | end 48 | // read the data while writing from the other end, check the read data is still good 49 | for(i = 0; i < num_tests; i = i + 2) begin 50 | w_addr <= i + 1; 51 | r_addr <= i; 52 | d_in <= num_tests - i; 53 | w_en <= 1'b1; 54 | r_en <= 1'b1; 55 | # 2; 56 | `assert("read while write data", d_out, i); 57 | r_en <= 1'b0; 58 | w_en <= 1'b0; 59 | # 2; 60 | end 61 | // read the newly written data 62 | for(i = 0; i < num_tests; i = i + 2) begin 63 | r_addr <= i + 1; 64 | r_en <= 1'b1; 65 | # 2; 66 | `assert("read new data", d_out, num_tests - i); 67 | r_en <= 1'b0; 68 | # 2; 69 | end 70 | $finish; 71 | end 72 | 73 | // clock 74 | reg clk = 0; 75 | always #1 clk = !clk; 76 | 77 | freq_bram #(.addr_w(addr_w), .data_w(data_w)) freq_bram_0(.w_clk(clk), .r_clk(clk), .w_en(w_en), .r_en(r_en), .d_in(d_in), .d_out(d_out), .r_addr(r_addr), .w_addr(w_addr)); 78 | 79 | endmodule 80 | -------------------------------------------------------------------------------- /python/sdft.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # https://stackoverflow.com/questions/6663222/doing-fft-in-realtime 3 | from cmath import cos, sin, pi 4 | from scipy import signal 5 | import numpy as np 6 | 7 | # sample history needs to be the same as the number of frequency bins 8 | N = 16 9 | samp_hist = N 10 | 11 | coeffs = [] 12 | freqs = [] 13 | in_s = [] 14 | sig_counter = 0 15 | 16 | 17 | def init_coeffs(): 18 | for i in range(N): 19 | a = 2.0 * pi * i / N 20 | coeff = complex(cos(a),sin(a)) 21 | coeffs.append(coeff) 22 | print(coeff) 23 | 24 | 25 | def sdft(delta): 26 | for i in range(N): 27 | freqs[i] = (freqs[i] + delta) * coeffs[i] 28 | 29 | 30 | # initialise 31 | init_coeffs() 32 | t = np.linspace(0, 1, samp_hist, endpoint=False) 33 | sig_in = signal.square(pi * 2 * t) 34 | #sig_in = np.sin(pi * 2 * t) 35 | 36 | for i in range(N): 37 | freqs.append(complex(0,0)) 38 | for i in range(samp_hist): 39 | in_s.append(complex(0,0)) 40 | 41 | 42 | # run the loop 43 | freq_hist = [] 44 | for i in range(samp_hist*2): 45 | freq_hist.append(list(freqs)) 46 | # rotate in new sample 47 | last = in_s[samp_hist-1] 48 | for i in range(samp_hist-1, 0, -1): 49 | in_s[i] = in_s[i-1] 50 | in_s[0] = complex(sig_in[sig_counter % samp_hist],0) 51 | 52 | sig_counter += 1 53 | 54 | 55 | # run the sdft 56 | delta = in_s[0] - last 57 | sdft(delta) 58 | 59 | """ 60 | print("dumping frequency history:") 61 | for f in range(N): 62 | print("%2d : " % f, end='') 63 | for i in range(32): 64 | print("(%4.1f,%4.1f)" % (freq_hist[i][f].real, freq_hist[i][f].imag), end='') 65 | print() 66 | """ 67 | # plot the results and compare with numpy's fft 68 | import matplotlib.pyplot as plt 69 | fig = plt.figure() 70 | ax = fig.add_subplot(2,2,3) 71 | plot_freqs = [] 72 | for i in range(N): 73 | plot_freqs.append(abs(freqs[i])) 74 | 75 | ax.plot(range(N), plot_freqs) 76 | ax.set_title("sliding dft") 77 | 78 | ax = fig.add_subplot(2,2,4) 79 | ax.plot(range(samp_hist), abs(np.fft.fft(sig_in[0:samp_hist]))) 80 | ax.set_title("numpy fft") 81 | 82 | ax = fig.add_subplot(2,2,1) 83 | ax.plot(range(samp_hist), sig_in[0:samp_hist]) 84 | ax.set_title("input signal") 85 | 86 | ax = fig.add_subplot(2,2,2) 87 | coeff_r = [] 88 | coeff_i = [] 89 | 90 | for i in range(N): 91 | coeff_r.append( coeffs[i].real) 92 | coeff_i.append( coeffs[i].imag) 93 | ax.plot(coeff_r, coeff_i) 94 | ax.set_title("coeffs/twiddles") 95 | 96 | plt.show() 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FFT on an FPGA 2 | 3 | having a go at some DSP on an FPGA. I'm basing the design on this paper: 4 | 5 | http://www.comm.toronto.edu/~dimitris/ece431/slidingdft.pdf 6 | 7 | This is a sliding discrete Fourier transform. It requires two real adds and one complex multiply per frequency bin. The transform is run for every new sample taken. 8 | 9 | ![overview](docs/sdft.png) 10 | 11 | ![sdft vs fft](docs/fft_vs_sdft.png) 12 | 13 | # Makefile 14 | 15 | * make list - compute the twiddle factor tables 16 | * make show-sdft - use yosys show to see how the design is inferred 17 | * make debug-sdft - use iverilog, vvp and gtkwave to show the results of the testbench 18 | * make read-sdft-vcd - after vcd generated, use Python script to read the vcd and plot the last set of results computed 19 | * make model-sdft - model sdft in python and compare against numpy's fft 20 | * make - build everything 21 | * make prog - program the icestick 22 | 23 | # Done 24 | 25 | * Read the resources 26 | * Implement HDL twiddle factor ROM 27 | * Implement an [SDFT in Python](python/sdft.py) using the same pattern 28 | * Why doesn't makefile build from scratch? - missing a file 29 | * Why do freq bin regs overflow almost immediately in the testbench? scaling 30 | * How to do scaling - the twiddle factors are scaled to fill the hole register so things overflow quickly, divide by 127 31 | * Why doesn't pnr work? - it does, but the module wasn't being used so yosys was removing it 32 | * make the python tools parse test/localparams.vh - very basic but works OK 33 | * adc connected, pinout is correct. sampleing has to be done on clock negedge 34 | 35 | # Todo 36 | 37 | * how to deal with timing analysis being lower than the clock I'm using 38 | 39 | # FPGA resources used 40 | 41 | Using an 8k device: 42 | 43 | IOs 18 / 206 44 | GBs 0 / 8 45 | GB_IOs 0 / 8 46 | LCs 3619 / 7680 47 | DFF 120 48 | CARRY 512 49 | CARRY, DFF 39 50 | DFF PASS 86 51 | CARRY PASS 165 52 | BRAMs 13 / 32 53 | WARMBOOTs 0 / 1 54 | PLLs 1 / 2 55 | 56 | # Resources 57 | 58 | * great video that explains what the Fourier transform is: https://www.youtube.com/watch?v=spUNpyF58BY 59 | * paper on implementing an FFT on an FPGA http://web.mit.edu/6.111/www/f2017/handouts/FFTtutorial121102.pdf 60 | * using Python to implement FFT: https://jakevdp.github.io/blog/2013/08/28/understanding-the-fft/ 61 | * sliding FFT https://www.dsprelated.com/showarticle/776.php 62 | * stackoverflow answer about sdft: https://stackoverflow.com/questions/6663222/doing-fft-in-realtime 63 | * paper on SDFT: http://www.comm.toronto.edu/~dimitris/ece431/slidingdft.pdf 64 | * http://www.analog.com/media/en/technical-documentation/data-sheets/AD9283.pdf 65 | 66 | -------------------------------------------------------------------------------- /tests/gtk-top.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI 3 | [*] Thu Jun 14 16:26:05 2018 4 | [*] 5 | [dumpfile] "/home/matt/work/fpga/fft/build/top.vcd" 6 | [dumpfile_mtime] "Thu Jun 14 16:25:31 2018" 7 | [dumpfile_size] 768551 8 | [savefile] "/home/matt/work/fpga/fft/tests/gtk-top.gtkw" 9 | [timestart] 0 10 | [size] 2512 1541 11 | [pos] 523 328 12 | *-11.993412 14940 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 | [treeopen] test. 14 | [treeopen] test.top_0. 15 | [treeopen] test.top_0.sdft_0. 16 | [sst_width] 395 17 | [signals_width] 536 18 | [sst_expanded] 1 19 | [sst_vpaned_height] 486 20 | @28 21 | test.top_0.clk 22 | test.top_0.px_clk 23 | test.top_0.activevideo 24 | test.top_0.hsync 25 | test.top_0.vsync 26 | @24 27 | test.top_0.x_px[9:0] 28 | test.top_0.y_px[9:0] 29 | @28 30 | test.top_0.vga_b 31 | test.top_0.draw_bar 32 | test.top_0.start_of_line 33 | test.top_0.start_of_screen 34 | @200 35 | - 36 | @23 37 | test.top_0.update_counter[6:0] 38 | @24 39 | test.top_0.sdft_0.sample[7:0] 40 | @28 41 | test.top_0.freq_bram_w 42 | @24 43 | test.top_0.freq_bram_r 44 | [color] 5 45 | test.top_0.freq_bram_r_clk 46 | @200 47 | - 48 | - 49 | @24 50 | test.top_0.freq_bram_0.r_en 51 | test.top_0.freq_bram_0.r_clk 52 | @200 53 | - 54 | @24 55 | test.top_0.sdft_0.sample[7:0] 56 | @22 57 | test.top_0.state[3:0] 58 | @28 59 | [color] 5 60 | test.top_0.fft_read 61 | test.top_0.fft_ready 62 | test.top_0.fft_start 63 | @24 64 | test.top_0.sdft_0.cycles[15:0] 65 | @200 66 | - 67 | @22 68 | test.top_0.sdft_0.state[3:0] 69 | @28 70 | test.top_0.sdft_0.start 71 | test.top_0.sdft_0.ready 72 | @420 73 | test.top_0.sdft_0.\frequency_bins_real[0][19:0] 74 | @c00420 75 | test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 76 | @28 77 | (0)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 78 | (1)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 79 | (2)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 80 | (3)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 81 | (4)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 82 | (5)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 83 | (6)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 84 | (7)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 85 | (8)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 86 | (9)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 87 | (10)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 88 | (11)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 89 | (12)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 90 | (13)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 91 | (14)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 92 | (15)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 93 | (16)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 94 | (17)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 95 | (18)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 96 | (19)test.top_0.sdft_0.\frequency_bins_imag[0][19:0] 97 | @1401200 98 | -group_end 99 | @420 100 | test.top_0.sdft_0.\frequency_bins_real[1][19:0] 101 | test.top_0.sdft_0.\frequency_bins_imag[1][19:0] 102 | test.top_0.sdft_0.\frequency_bins_real[2][19:0] 103 | test.top_0.sdft_0.\frequency_bins_imag[2][19:0] 104 | test.top_0.sdft_0.\frequency_bins_real[3][19:0] 105 | test.top_0.sdft_0.\frequency_bins_imag[3][19:0] 106 | test.top_0.sdft_0.bin_out_real[19:0] 107 | test.top_0.sdft_0.bin_out_imag[19:0] 108 | @200 109 | - 110 | @24 111 | test.top_0.freq_bram_0.\ram[0][19:0] 112 | test.top_0.freq_bram_0.\ram[1][19:0] 113 | test.top_0.freq_bram_0.\ram[2][19:0] 114 | test.top_0.freq_bram_0.\ram[3][19:0] 115 | test.top_0.freq_bram_0.\ram[4][19:0] 116 | test.top_0.freq_bram_0.\ram[5][19:0] 117 | test.top_0.freq_bram_0.\ram[6][19:0] 118 | [pattern_trace] 1 119 | [pattern_trace] 0 120 | -------------------------------------------------------------------------------- /python/read_vcd.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from pprint import pprint; 3 | from Verilog_VCD import parse_vcd 4 | import struct 5 | import sys 6 | from parse_verilog_header import ParseParams 7 | 8 | if len(sys.argv) != 3: 9 | exit("give vcd as first arg, params as 2nd") 10 | 11 | params = ParseParams(sys.argv[2]).parse() 12 | 13 | N = params['freq_bins'] 14 | data_width = params['data_width'] 15 | freq_d_width = params['freq_data_w'] 16 | 17 | 18 | print("N: %d, data width: %d, freq width %d" % (N, data_width, freq_d_width)) 19 | 20 | vcd = parse_vcd(sys.argv[1]) 21 | 22 | def twos_comp(val, bits): 23 | """compute the 2's complement of int value val""" 24 | if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 25 | val = val - (1 << bits) # compute negative value 26 | return val # return positive value as is 27 | 28 | 29 | def fetch_data(name, bitlength = freq_d_width): 30 | for key in vcd.keys(): 31 | if name in vcd[key]['nets'][0]['name']: 32 | data = (vcd[key]['tv']) 33 | ints = [] 34 | for d in data: 35 | ints.append(twos_comp(int(d[1],2), freq_d_width)) 36 | return ints 37 | 38 | 39 | reals = [] 40 | imags = [] 41 | ram = [] 42 | for i in range(N): 43 | real_name = 'frequency_bins_real[%d]' % i 44 | imag_name = 'frequency_bins_imag[%d]' % i 45 | ram_name = 'ram[%d]' % i 46 | reals.append(fetch_data(real_name)) 47 | imags.append(fetch_data(imag_name)) 48 | ram.append(fetch_data(ram_name)) 49 | 50 | # find longest set (these are VCD, so some may only have a limited number of entries 51 | hist_len = 0 52 | for i in range(N): 53 | if len(reals[i]) > hist_len: 54 | hist_len = len(reals[i]) 55 | if len(imags[i]) > hist_len: 56 | hist_len = len(imags[i]) 57 | 58 | # pad with last value if necessary 59 | for i in range(N): 60 | for p in range(hist_len-len(reals[i])): 61 | reals[i].append(reals[i][len(reals[i])-1]) 62 | for p in range(hist_len-len(imags[i])): 63 | imags[i].append(reals[i][len(imags[i])-1]) 64 | 65 | 66 | print("recovered %d sets of freq history" % hist_len) 67 | print("last set of bins:") 68 | for n in range(N): 69 | print("%3d: (%8d, %8dj), |%8d| %8d^2" % ( n, reals[n][hist_len-1], imags[n][hist_len-1], abs(complex(reals[n][hist_len-1], imags[n][hist_len-1])), pow(reals[n][hist_len-1],2) + pow(imags[n][hist_len-1],2) )) 70 | 71 | plot_last = True 72 | plot_all = False 73 | if plot_all or plot_last: 74 | import matplotlib.pyplot as plt 75 | fig = plt.figure() 76 | 77 | 78 | if plot_all: 79 | color = 0.0 80 | MAX_HIST = 20 81 | jumps = hist_len / MAX_HIST 82 | plot_num = 1 83 | # show the last MAX_HIST freq plots 84 | for h in range(0, MAX_HIST * jumps, jumps): 85 | print(h, plot_num) 86 | points = [] 87 | for n in range(N): 88 | points.append(abs(complex(reals[n][h], imags[n][h]))) 89 | print(points) 90 | ax = fig.add_subplot(MAX_HIST+1,1,plot_num) 91 | # ax.set_ylim([0,500]) 92 | ax.plot(range(N), points) 93 | plot_num += 1 94 | 95 | if plot_last: 96 | points = [] 97 | for n in range(N): 98 | points.append(abs(complex(reals[n][hist_len-1], imags[n][hist_len-1]))) 99 | plt.plot(range(N), points) 100 | 101 | # bram 102 | if ram[0] is not None: 103 | ram_len = len(ram[0]) 104 | points = [] 105 | for n in range(N): 106 | points.append(ram[n][ram_len-1]) 107 | plt.plot(range(N), points) 108 | 109 | if plot_all or plot_last: 110 | #ax.legend() 111 | plt.show() 112 | -------------------------------------------------------------------------------- /hdl/VgaSyncGen.v: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////////////////// 2 | // Company: Ridotech 3 | // Engineer: Juan Manuel Rico 4 | // 5 | // Create Date: 09:34:23 30/09/2017 6 | // Module Name: vga_controller 7 | // Description: Basic control for 640x480@72Hz VGA signal. 8 | // 9 | // Dependencies: 10 | // 11 | // Revision: 12 | // Revision 0.01 - File Created for Roland Coeurjoly (RCoeurjoly) in 640x480@85Hz. 13 | // Revision 0.02 - Change for 640x480@60Hz. 14 | // Revision 0.03 - Solved some mistakes. 15 | // Revision 0.04 - Change for 640x480@72Hz and output signals 'activevideo' 16 | // and 'px_clk'. 17 | // 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | module VgaSyncGen ( 22 | input wire clk, // Input clock: 12MHz 23 | output wire hsync, // Horizontal sync out 24 | output wire vsync, // Vertical sync out 25 | output reg [9:0] x_px, // X position for actual pixel. 26 | output reg [9:0] y_px, // Y position for actual pixel. 27 | output wire activevideo, 28 | output wire px_clk 29 | ); 30 | 31 | // Generated values for pixel clock of 31.5Mhz and 72Hz frame frecuency. 32 | // # icepll -i12 -o31.5 33 | // 34 | // F_PLLIN: 12.000 MHz (given) 35 | // F_PLLOUT: 31.500 MHz (requested) 36 | // F_PLLOUT: 31.500 MHz (achieved) 37 | // 38 | // FEEDBACK: SIMPLE 39 | // F_PFD: 12.000 MHz 40 | // F_VCO: 1008.000 MHz 41 | // 42 | // DIVR: 0 (4'b0000) 43 | // DIVF: 83 (7'b1010011) 44 | // DIVQ: 5 (3'b101) 45 | // 46 | // FILTER_RANGE: 1 (3'b001) 47 | // 48 | `ifdef __ICARUS__ 49 | assign px_clk = clk; 50 | `else 51 | SB_PLL40_CORE #(.FEEDBACK_PATH("SIMPLE"), 52 | .PLLOUT_SELECT("GENCLK"), 53 | .DIVR(4'b0000), 54 | .DIVF(7'b1010011), 55 | .DIVQ(3'b101), 56 | .FILTER_RANGE(3'b001) 57 | ) 58 | uut 59 | ( 60 | .REFERENCECLK(clk), 61 | .PLLOUTCORE(px_clk), 62 | .RESETB(1'b1), 63 | .BYPASS(1'b0) 64 | ); 65 | 66 | `endif 67 | 68 | /* 69 | http://www.epanorama.net/faq/vga2rgb/calc.html 70 | [*User-Defined_mode,(640X480)] 71 | PIXEL_CLK = 31500 72 | H_DISP = 640 73 | V_DISP = 480 74 | H_FPORCH = 24 75 | H_SYNC = 40 76 | H_BPORCH = 128 77 | V_FPORCH = 9 78 | V_SYNC = 3 79 | V_BPORCH = 28 80 | */ 81 | 82 | // Video structure constants. 83 | parameter activeHvideo = 640; // Width of visible pixels. 84 | parameter activeVvideo = 480; // Height of visible lines. 85 | parameter hfp = 24; // Horizontal front porch length. 86 | parameter hpulse = 40; // Hsync pulse length. 87 | parameter hbp = 128; // Horizontal back porch length. 88 | parameter vfp = 9; // Vertical front porch length. 89 | parameter vpulse = 3; // Vsync pulse length. 90 | parameter vbp = 28; // Vertical back porch length. 91 | parameter blackH = hfp + hpulse + hbp; // Hide pixels in one line. 92 | parameter blackV = vfp + vpulse + vbp; // Hide lines in one frame. 93 | parameter hpixels = blackH + activeHvideo; // Total horizontal pixels. 94 | parameter vlines = blackV + activeVvideo; // Total lines. 95 | 96 | // Registers for storing the horizontal & vertical counters. 97 | reg [9:0] hc; 98 | reg [9:0] vc; 99 | 100 | // Initial values. 101 | initial 102 | begin 103 | x_px <= 0; 104 | y_px <= 0; 105 | hc <= 0; 106 | vc <= 0; 107 | end 108 | 109 | // Counting pixels. 110 | always @(posedge px_clk) 111 | begin 112 | // Keep counting until the end of the line. 113 | if (hc < hpixels - 1) 114 | hc <= hc + 1; 115 | else 116 | // When we hit the end of the line, reset the horizontal 117 | // counter and increment the vertical counter. 118 | // If vertical counter is at the end of the frame, then 119 | // reset that one too. 120 | begin 121 | hc <= 0; 122 | if (vc < vlines - 1) 123 | vc <= vc + 1; 124 | else 125 | vc <= 0; 126 | end 127 | end 128 | 129 | // Generate sync pulses (active low) and active video. 130 | assign hsync = (hc >= hfp && hc < hfp + hpulse) ? 0:1; 131 | assign vsync = (vc >= vfp && vc < vfp + vpulse) ? 0:1; 132 | assign activevideo = (hc >= blackH && vc >= blackV) ? 1:0; 133 | 134 | // Generate color. 135 | always @(posedge px_clk) 136 | begin 137 | // First check if we are within vertical active video range. 138 | if (activevideo) 139 | begin 140 | x_px <= hc - blackH; 141 | y_px <= vc - blackV; 142 | end 143 | else 144 | // We are outside active video range so display black. 145 | begin 146 | x_px <= 0; 147 | y_px <= 0; 148 | end 149 | end 150 | endmodule 151 | -------------------------------------------------------------------------------- /hdl/top.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | 3 | module top ( 4 | input clk, 5 | input [7:0] adc, 6 | output [1:0] gpio, 7 | output adc_clk, 8 | output adc_shutdown, 9 | output hsync, 10 | output vsync, 11 | output vga_r, 12 | output vga_g, 13 | output vga_b 14 | 15 | ); 16 | 17 | `include "tests/localparams.vh" 18 | 19 | reg [6:0] update_counter = 0; // when this wraps we update the frequency bins 20 | reg [8:0] read_cycles = 0; // keep track of fft reads -> bram 21 | 22 | integer i; 23 | 24 | wire [9:0] x_px; 25 | wire [9:0] y_px; 26 | wire signed [freq_data_w-1:0] bin_out_imag; 27 | wire signed [freq_data_w-1:0] bin_out_real; 28 | wire fft_ready; 29 | wire fft_clk = px_clk; 30 | reg fft_start = 0; 31 | wire fft_read; // = 0; 32 | 33 | reg [7:0] sample = 0; 34 | reg [7:0] temp_sample = 0; 35 | 36 | sdft #( .data_width(data_width), .freq_bins(freq_bins), .freq_w(freq_data_w)) sdft_0(.clk (fft_clk), .sample(sample), .ready(fft_ready), .start(fft_start), .read(fft_read), .bin_out_real(bin_out_real), .bin_out_imag(bin_out_imag), .bin_addr(freq_bram_w_addr)); 37 | 38 | wire px_clk; 39 | wire activevideo; 40 | wire draw_bar; 41 | assign vga_g = activevideo && (draw_bar || x_px < 5); 42 | assign vga_r = activevideo && draw_bar; // not connected on the board at the mo 43 | assign vga_b = activevideo && draw_bar; 44 | 45 | VgaSyncGen vga_inst( .clk(clk), .hsync(hsync), .vsync(vsync), .x_px(x_px), .y_px(y_px), .px_clk(px_clk), .activevideo(activevideo)); 46 | 47 | reg [bin_addr_w-1:0] freq_bram_w_addr = 0; 48 | wire [bin_addr_w-1:0] freq_bram_r_addr; 49 | wire [freq_data_w-1:0] freq_bram_out; 50 | reg [freq_data_w-1:0] freq_bram_in = 0; 51 | reg freq_bram_w = 0; // write enable signal 52 | wire freq_bram_r; // read enable signal 53 | wire freq_bram_r_clk = px_clk; 54 | wire freq_bram_w_clk = px_clk; 55 | 56 | freq_bram #(.addr_w(bin_addr_w), .data_w(freq_data_w)) freq_bram_0(.w_clk(freq_bram_w_clk), .r_clk(freq_bram_r_clk), .w_en(freq_bram_w), .r_en(freq_bram_r), .d_in(freq_bram_in), .d_out(freq_bram_out), .r_addr(freq_bram_r_addr), .w_addr(freq_bram_w_addr)); 57 | 58 | /////////////////////////////////////////////////////////////// 59 | // adc 60 | assign adc_shutdown = 0; 61 | assign adc_clk = clk; 62 | always @(negedge adc_clk) 63 | temp_sample <= adc; 64 | 65 | /////////////////////////////////////////////////////////////// 66 | // 67 | // run the fft 68 | assign fft_read = (state == STATE_PROCESS) && fft_ready && ! activevideo; 69 | assign gpio[0] = (state == STATE_WAIT_START); // purple tracae 70 | assign gpio[1] = (state == STATE_WRITE_BRAM); // blue trace 71 | 72 | reg [3:0] state = STATE_WAIT_FFT; 73 | // sample data as fast as possible 74 | always @(posedge fft_clk) begin 75 | case(state) 76 | STATE_WAIT_FFT: begin 77 | if(fft_ready) begin 78 | sample <= temp_sample; 79 | fft_start <= 1'b1; 80 | state <= STATE_WAIT_START; 81 | end 82 | end 83 | 84 | STATE_WAIT_START: begin 85 | if(fft_ready == 0) 86 | state <= STATE_PROCESS; 87 | end 88 | 89 | STATE_PROCESS: begin 90 | fft_start <= 1'b0; 91 | if(fft_ready) begin 92 | update_counter <= update_counter + 1; 93 | if(update_counter == FFT_READ_CYCLES && ! activevideo) begin // read the next bank of frequency data into the bram 94 | update_counter <= 0; 95 | // read flag set by wire assignment instead to meet timing 96 | // fft_read <= 1'b1; 97 | state <= STATE_READ; 98 | end else 99 | state <= STATE_WAIT_FFT; 100 | end else 101 | state <= STATE_PROCESS; 102 | end 103 | 104 | STATE_READ: begin 105 | // store the squared bin value to BRAM 106 | read_cycles <= read_cycles + 1; 107 | freq_bram_in <= ((bin_out_real * bin_out_real) + (bin_out_imag * bin_out_imag)) >> 8; // some divider here 108 | //fft_read <= 1'b0; 109 | freq_bram_w <= 1'b1; 110 | state <= STATE_WRITE_BRAM; 111 | end 112 | 113 | STATE_WRITE_BRAM: begin 114 | freq_bram_w <= 1'b0; 115 | state <= STATE_WAIT_FFT; 116 | // increment the counter and wrap it 117 | freq_bram_w_addr <= freq_bram_w_addr + 1; 118 | if(freq_bram_w_addr == freq_bins -1) 119 | freq_bram_w_addr <= 0; 120 | end 121 | 122 | endcase 123 | end 124 | 125 | /////////////////////////////////////////////////////////////// 126 | // 127 | // draw the bars 128 | 129 | // bram addr is calculated from y_px 130 | assign freq_bram_r_addr = y_px / bar_height; 131 | // request new value at top of bar and left side of screen 132 | wire start_of_screen = y_px == 0 && x_px == 0 && activevideo; 133 | wire start_of_line = x_px == 0 && activevideo; 134 | assign freq_bram_r = bar_height_counter == 0 && start_of_line; 135 | // draw the bar if the x_px is below the frequency value 136 | assign draw_bar = x_px < freq_bram_out; 137 | reg [bar_height_counter_w:0] bar_height_counter = 0; 138 | 139 | // increment bar_height_counter every new line, reset to 0 at top of the screen and after every bar 140 | always@(posedge start_of_line) begin 141 | bar_height_counter <= bar_height_counter + 1; 142 | if(start_of_screen) 143 | bar_height_counter <= 0; 144 | else if(bar_height_counter == bar_height - 1) 145 | bar_height_counter <= 0; 146 | end 147 | 148 | endmodule 149 | -------------------------------------------------------------------------------- /docs/fpga_usage.md: -------------------------------------------------------------------------------- 1 | # parallel (current design) 2 | 3 | needs 1 complex multiply and 2 adds per bin. Then another complex multiply to 4 | get output (not thinking about sqrt ATM). with 16 bins, design needs 8k logic 5 | cells with complex output. 16k with with squared output. 6 | 7 | potentially could run at 100MHz, 50MHz largest freq. Each bin would cover 3MHz. 8 | 9 | # parallel w/multiplication lookup tables 10 | 11 | 8 bit sample and 8 bit coeff means 16bit lookup table 12 | 13 | 16 bin: 16 * 2 coeffs, 256 sample values = 8192 values in LUT * 16bit == 131k 14 | so could be just possible on ICE40 8k device 15 | 16 | 8 bin output with multipliers possible on 8k. 17 | 18 | still none of these will give modulo output. 19 | 20 | # serial processing 21 | 22 | another option is to process the bins serially, so only one complex multiply is 23 | needed. Much more likely to be able to fit a 100 bin FFT. However, this quickly 24 | reduces throughput. With 100mhz clock and 100 bin FFT, could theoretically 25 | process a new sample in 100 clocks, so max FFT frequency would be 500kHz, 5khz 26 | bins. 27 | 28 | # parallelised serial processing 29 | 30 | same idea as the above, but use all the spare logic cells for duplicating the 31 | complex multiplies. 32 | 1 bin squared output requires 1485 cells, uses 3 BRAMS for 2 * 127 * 8bit table 33 | 34 | Each additional complex multiply/accumulate takes around 400 cells 35 | So on an 8k device, reserving 500 cells for the serial logic, there will be 36 | about 6000 cells left, to make 16 MAs in total. 37 | 38 | So with 128 bins, we can process all the data in 8 cycles. At 100Mhz, we are 39 | then at about 10MHz throughput, or max frequency bin is 5Mhz, with each bin 40 | representing 40khz. 41 | 42 | # post PNR usage figures for parallel design 43 | 44 | 1k has 1280 logic cells, and 64kbit bram 45 | 8k has 7680 locic cells, and 128kbit bram 46 | 47 | ## serial processing 48 | 49 | 16 * 20 bit bins 50 | 2 * 16 * 16 bit coeffs 51 | 16 * 8 bit sample history 52 | 8 bit sample width 53 | 54 | After packing: 55 | IOs 18 / 206 56 | GBs 0 / 8 57 | GB_IOs 0 / 8 58 | LCs 4773 / 7680 59 | DFF 762 60 | CARRY 444 61 | CARRY, DFF 42 62 | DFF PASS 692 63 | CARRY PASS 41 64 | BRAMs 5 / 32 65 | WARMBOOTs 0 / 1 66 | PLLs 1 / 2 67 | 68 | timing estimate 18Mhz 69 | 70 | Tried pipelining the multiplies for the bram write in top. Makes no difference to timing estimate. 71 | Tried pipelining the multiplies in the sdft loop, also made no difference to timing estimate. 72 | 73 | ## complex output 74 | 75 | 16 * 16 bit bins, 76 | 2 * 16 * 16 bit coeffs 77 | 16 * 16 bit sample history 78 | output is just 16 * bins.imag (no module) 79 | 80 | After packing: 81 | IOs 16 / 96 82 | GBs 0 / 8 83 | GB_IOs 0 / 8 84 | LCs 7579 / 1280 85 | DFF 99 86 | CARRY 800 87 | CARRY, DFF 248 88 | DFF PASS 31 89 | CARRY PASS 68 90 | BRAMs 1 / 16 91 | WARMBOOTs 0 / 1 92 | PLLs 1 / 1 93 | 94 | # squared output 95 | 96 | 16 * 16 bit bins, 97 | 2 * 16 * 16 bit coeffs 98 | 16 * 16 bit sample history 99 | with squared output 100 | 101 | After packing: 102 | IOs 16 / 96 103 | GBs 0 / 8 104 | GB_IOs 0 / 8 105 | LCs 15646 / 1280 106 | DFF 100 107 | CARRY 1027 108 | CARRY, DFF 265 109 | DFF PASS 31 110 | CARRY PASS 65 111 | BRAMs 1 / 16 112 | WARMBOOTs 0 / 1 113 | PLLs 1 / 1 114 | 115 | # 4 bin squared output 116 | 117 | 16 * 16 bit bins, 118 | 2 * 16 * 16 bit coeffs 119 | 16 * 16 bit sample history 120 | with squared output but only for 4 bins 121 | 122 | After packing: 123 | IOs 16 / 96 124 | GBs 0 / 8 125 | GB_IOs 0 / 8 126 | LCs 7579 / 1280 127 | DFF 99 128 | CARRY 800 129 | CARRY, DFF 248 130 | DFF PASS 31 131 | CARRY PASS 68 132 | BRAMs 1 / 16 133 | WARMBOOTs 0 / 1 134 | PLLs 1 / 1 135 | 136 | # 8 bin squared output 137 | 138 | 16 * 16 bit bins, 139 | 2 * 16 * 16 bit coeffs 140 | 16 * 16 bit sample history 141 | with squared output but only for 8 bins 142 | 143 | After packing: 144 | IOs 16 / 96 145 | GBs 0 / 8 146 | GB_IOs 0 / 8 147 | LCs 7708 / 1280 148 | DFF 67 149 | CARRY 562 150 | CARRY, DFF 154 151 | DFF PASS 31 152 | CARRY PASS 43 153 | BRAMs 1 / 16 154 | WARMBOOTs 0 / 1 155 | PLLs 1 / 1 156 | 157 | # 1 bin squared output 158 | 159 | 16 * 16 bit bins, 160 | 2 * 16 * 16 bit coeffs 161 | 16 * 16 bit sample history 162 | with squared output but only for 1 bins 163 | 164 | After packing: 165 | IOs 16 / 96 166 | GBs 0 / 8 167 | GB_IOs 0 / 8 168 | LCs 1034 / 1280 169 | DFF 42 170 | CARRY 150 171 | CARRY, DFF 42 172 | DFF PASS 27 173 | CARRY PASS 23 174 | BRAMs 0 / 16 175 | WARMBOOTs 0 / 1 176 | PLLs 1 / 1 177 | 178 | # 1 bin squared output 179 | 180 | 16 * 16 bit bins 181 | 2 * 16 * 8 bit coeffs 182 | 16 * 8 bit sample history 183 | with squared output but only for 1 bins 184 | bram for coeffs 185 | 186 | After packing: 187 | IOs 16 / 96 188 | GBs 0 / 8 189 | GB_IOs 0 / 8 190 | LCs 1485 / 1280 191 | DFF 308 192 | CARRY 141 193 | CARRY, DFF 26 194 | DFF PASS 295 195 | CARRY PASS 23 196 | BRAMs 3 / 16 197 | WARMBOOTs 0 / 1 198 | PLLs 1 / 1 199 | 200 | ## with 2 multiplies for fft and squaring output 201 | 202 | IOs 16 / 96 203 | GBs 0 / 8 204 | GB_IOs 0 / 8 205 | LCs 2154 / 1280 206 | DFF 308 207 | CARRY 174 208 | CARRY, DFF 26 209 | DFF PASS 231 210 | CARRY PASS 25 211 | BRAMs 3 / 16 212 | WARMBOOTs 0 / 1 213 | PLLs 1 / 1 214 | 215 | ## with 4 mulitplies for fft and squaring output 216 | IOs 16 / 96 217 | GBs 0 / 8 218 | GB_IOs 0 / 8 219 | LCs 3738 / 1280 220 | DFF 308 221 | CARRY 262 222 | CARRY, DFF 26 223 | DFF PASS 167 224 | CARRY PASS 28 225 | BRAMs 3 / 16 226 | WARMBOOTs 0 / 1 227 | PLLs 1 / 1 228 | 229 | -------------------------------------------------------------------------------- /hdl/sdft.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | /* 3 | SDFT module: sliding DFT. See http://www.comm.toronto.edu/~dimitris/ece431/slidingdft.pdf for more details. 4 | The SDFT is not as efficient as an FFT to calculate an entire range, but can have arbitary numbers of bins 5 | (not powers of 2), and single bins can be calculated, so in some cases it can be more efficient. 6 | 7 | The module has been tested against the Python numpy fft routine. 8 | 9 | Interface: 10 | 11 | clk input clock. everything is done on rising edge. 12 | sample input data. Width is set with data_width parameter 13 | start start the process. If you have 32 bins, this will take 98 clocks (3 clocks per bin + 2 setup) 14 | read when in idle will put the contents of the given frequency bin on bin_out_real and bin_out_imag 15 | bin_addr which bin to read - value from 0 to bins-1 16 | ready when sdft is in idle state - can now start or read 17 | 18 | Parameters: 19 | 20 | data_width how wide the input sample data is 21 | freq_bins how many bins to calculate 22 | freq_w how wide the frequency data should be 23 | FILE_REAL real twiddle factors for each bin 24 | FILE_IMAG imag twiddle factors for each bin. See python/gen_twiddle.py for generation of these files. 25 | 26 | */ 27 | module sdft 28 | #( 29 | parameter data_width = 8, 30 | parameter freq_bins = 16, 31 | parameter freq_w = 20, // to prevent overflow 32 | parameter FILE_REAL = "hdl/twiddle_real.list", 33 | parameter FILE_IMAJ = "hdl/twiddle_imag.list" 34 | ) 35 | ( 36 | input wire clk, 37 | input wire [data_width-1:0] sample, 38 | input wire start, 39 | input wire read, 40 | input wire [bin_addr_w-1:0] bin_addr, 41 | 42 | output reg signed [freq_w-1:0] bin_out_real, 43 | output reg signed [freq_w-1:0] bin_out_imag, 44 | output wire ready 45 | ); 46 | 47 | 48 | initial begin 49 | bin_out_real <= 0; 50 | bin_out_imag <= 0; 51 | end 52 | 53 | reg [15:0] cycles = 0; 54 | 55 | // width of addr needed to address the frequency bins 56 | localparam bin_addr_w = $clog2(freq_bins); 57 | 58 | // register for the twiddle factor ROM 59 | reg [bin_addr_w-1:0] tw_addr; 60 | 61 | // register for sample index 62 | reg [bin_addr_w-1:0] sample_index; 63 | 64 | // twiddle factor ROM 65 | wire signed [data_width-1:0] twid_real; 66 | wire signed [data_width-1:0] twid_imag; 67 | twiddle_rom #(.addr_w(bin_addr_w), .data_w(data_width)) twiddle_rom_0(.clk(clk), .addr(tw_addr), .dout_real(twid_real), .dout_imag(twid_imag)); 68 | 69 | // complex mult as a module 70 | /* 71 | wire signed [data_width*2-1:0] complex_mult_out_real; 72 | wire signed [data_width*2-1:0] complex_mult_out_imag; 73 | 74 | wire signed [data_width:0] complex_mult_in_a_real; // one extra bit for handling subtraction of delta 75 | wire signed [data_width:0] complex_mult_in_a_imag; 76 | 77 | complex_mult #(.data_in_w(data_width+1), .data_out_w(data_width*2)) complex_mult_0(.a_real(complex_mult_in_a_real), .a_imag(complex_mult_in_a_imag), .b_real(twid_real), .b_imag(twid_imag), .out_real(complex_mult_out_real), .out_imag(complex_mult_out_imag)); 78 | 79 | */ 80 | // frequency bins RAM - double width + 2 to handle multiply 81 | reg signed [freq_w-1:0] frequency_bins_real [freq_bins-1:0]; 82 | reg signed [freq_w-1:0] frequency_bins_imag [freq_bins-1:0]; 83 | 84 | // sample storage 85 | reg [data_width-1:0] samples [freq_bins-1:0]; 86 | 87 | // delta storage (1 more than data_width to handle subtraction) 88 | reg signed [data_width:0] delta; 89 | 90 | integer j; 91 | initial begin 92 | tw_addr = 0; 93 | sample_index = 0; 94 | delta = 0; 95 | for(j = 0; j < freq_bins; j = j + 1) begin 96 | samples[j] = 0; 97 | frequency_bins_real[j] = 0; 98 | frequency_bins_imag[j] = 0; 99 | end 100 | end 101 | 102 | 103 | 104 | localparam STATE_WAIT = 0; 105 | localparam STATE_START = 1; 106 | localparam STATE_READ = 2; 107 | localparam STATE_LOAD_ROM = 3; 108 | localparam STATE_WAIT_ROM = 4; 109 | localparam STATE_CALC = 5; 110 | localparam STATE_FINISH = 6; 111 | 112 | reg [3:0] state = STATE_WAIT; 113 | /* 114 | assign complex_mult_in_a_real = frequency_bins_real[tw_addr] + delta; 115 | assign complex_mult_in_a_imag = frequency_bins_imag[tw_addr]; // imag component 116 | */ 117 | assign ready = (state == STATE_WAIT) ? 1'b1 : 1'b0; 118 | 119 | always@(posedge clk) begin 120 | case(state) 121 | STATE_WAIT: begin 122 | if(start) 123 | state <= STATE_START; 124 | if(read) 125 | state <= STATE_READ; 126 | end 127 | 128 | STATE_READ: begin 129 | bin_out_real <= frequency_bins_real[bin_addr]; 130 | bin_out_imag <= frequency_bins_imag[bin_addr]; 131 | state <= STATE_WAIT; 132 | 133 | end 134 | 135 | STATE_START: begin 136 | cycles <= cycles + 1; // keep track of how many cycles 137 | // get delta: newest - oldest 138 | delta <= sample - samples[sample_index]; 139 | // store new sample 140 | samples[sample_index] <= sample; 141 | tw_addr <= 0; 142 | state <= STATE_CALC; 143 | end 144 | 145 | STATE_LOAD_ROM: begin // 2 146 | tw_addr <= tw_addr + 1; 147 | if(tw_addr == freq_bins -1) begin 148 | tw_addr <= 0; 149 | state <= STATE_FINISH; 150 | end else 151 | state <= STATE_WAIT_ROM; 152 | end 153 | 154 | STATE_WAIT_ROM: begin // 3 155 | state <= STATE_CALC; 156 | end 157 | 158 | STATE_CALC: begin // 4 159 | frequency_bins_real[tw_addr] <= ((frequency_bins_real[tw_addr] + delta) * twid_real - (frequency_bins_imag[tw_addr] * twid_imag)) >>> 7; 160 | frequency_bins_imag[tw_addr] <= ((frequency_bins_real[tw_addr] + delta) * twid_imag + (frequency_bins_imag[tw_addr] * twid_real)) >>> 7; 161 | state <= STATE_LOAD_ROM; 162 | end 163 | 164 | STATE_FINISH: begin 165 | // increment sample index (same as rotating) 166 | sample_index <= sample_index + 1; 167 | // reset index if it wraps 168 | if(sample_index == freq_bins) 169 | sample_index <= 0; 170 | state <= STATE_WAIT; 171 | end 172 | 173 | endcase 174 | end 175 | 176 | 177 | endmodule 178 | 179 | -------------------------------------------------------------------------------- /python/Verilog_VCD.py: -------------------------------------------------------------------------------- 1 | # This is a manual translation, from perl to python, of : 2 | # http://cpansearch.perl.org/src/GSULLIVAN/Verilog-VCD-0.03/lib/Verilog/VCD.pm 3 | 4 | import re 5 | 6 | global timescale 7 | global endtime 8 | 9 | 10 | # our local exception for VCD parsing errors (inherited from Exception) 11 | class VCDParseError(Exception): 12 | pass 13 | 14 | 15 | def list_sigs(file): 16 | """Parse input VCD file into data structure, 17 | then return just a list of the signal names.""" 18 | 19 | vcd = parse_vcd(file, only_sigs=1) 20 | 21 | sigs = [] 22 | for k in vcd.keys(): 23 | v = vcd[k] 24 | nets = v['nets'] 25 | sigs.extend( n['hier']+'.'+n['name'] for n in nets ) 26 | 27 | return sigs 28 | 29 | 30 | def parse_vcd(file, only_sigs=0, use_stdout=0, siglist=[], opt_timescale=''): 31 | """Parse input VCD file into data structure. 32 | Also, print t-v pairs to STDOUT, if requested.""" 33 | 34 | global endtime 35 | 36 | usigs = {} 37 | for i in siglist: 38 | usigs[i] = 1 39 | 40 | if len(usigs): 41 | all_sigs = 0 42 | else: 43 | all_sigs = 1 44 | 45 | data = {} 46 | mult = 0 47 | num_sigs = 0 48 | hier = [] 49 | time = 0 50 | 51 | with open(file, 'r') as fh: 52 | while True: 53 | line = fh.readline() 54 | if line == '': # EOF 55 | break 56 | 57 | # chomp 58 | # s/ ^ \s+ //x 59 | line = line.strip() 60 | 61 | # if nothing left after we strip whitespace, go to next line 62 | if line == '': 63 | continue 64 | 65 | # put most frequent lines encountered at start of if/elif, so other 66 | # clauses usually don't need to be tested 67 | if line[0] in ('b', 'B', 'r', 'R'): 68 | (value,code) = line[1:].split() 69 | if (code in data): 70 | if (use_stdout): 71 | print( time, value ) 72 | else: 73 | if 'tv' not in data[code]: 74 | data[code]['tv'] = [] 75 | data[code]['tv'].append( (time, value) ) 76 | 77 | elif line[0] in ('0', '1', 'x', 'X', 'z', 'Z'): 78 | value = line[0] 79 | code = line[1:] 80 | if (code in data): 81 | if (use_stdout): 82 | print( time, value ) 83 | else: 84 | if 'tv' not in data[code]: 85 | data[code]['tv'] = [] 86 | data[code]['tv'].append( (time, value) ) 87 | 88 | elif line[0]=='#': 89 | time = mult * int(line[1:]) 90 | endtime = time 91 | 92 | elif "$enddefinitions" in line: 93 | num_sigs = len(data) 94 | if (num_sigs == 0): 95 | if (all_sigs): 96 | VCDParseError("Error: No signals were found in the "\ 97 | "VCD file "+file+". Check the VCD file for "\ 98 | "proper var syntax.") 99 | 100 | else: 101 | VCDParseError("Error: No matching signals were found "\ 102 | "in the VCD file "+file+". Use list_sigs to "\ 103 | "view all signals in the VCD file.") 104 | 105 | if ((num_sigs>1) and use_stdout): 106 | VCDParseError("Error: There are too many signals "\ 107 | "(num_sigs) for output to STDOUT. Use list_sigs "\ 108 | "to select a single signal.") 109 | 110 | if only_sigs: 111 | break 112 | 113 | elif "$timescale" in line: 114 | statement = line 115 | if not "$end" in line: 116 | while fh: 117 | line = fh.readline() 118 | statement += line 119 | if "$end" in line: 120 | break 121 | 122 | mult = calc_mult(statement, opt_timescale) 123 | 124 | elif "$scope" in line: 125 | # assumes all on one line 126 | # $scope module dff end 127 | hier.append( line.split()[2] ) # just keep scope name 128 | 129 | elif "$upscope" in line: 130 | hier.pop() 131 | 132 | elif "$var" in line: 133 | # assumes all on one line: 134 | # $var reg 1 *@ data $end 135 | # $var wire 4 ) addr [3:0] $end 136 | ls = line.split() 137 | type = ls[1] 138 | size = ls[2] 139 | code = ls[3] 140 | name = "".join(ls[4:-1]) 141 | path = '.'.join(hier) 142 | full_name = path + '.' + name 143 | if (full_name in usigs) or all_sigs: 144 | if code not in data: 145 | data[code] = {} 146 | if 'nets' not in data[code]: 147 | data[code]['nets'] = [] 148 | var_struct = { 149 | 'type' : type, 150 | 'name' : name, 151 | 'size' : size, 152 | 'hier' : path, 153 | } 154 | if var_struct not in data[code]['nets']: 155 | data[code]['nets'].append( var_struct ) 156 | 157 | fh.close() 158 | 159 | return data 160 | 161 | 162 | def calc_mult (statement, opt_timescale=''): 163 | """ 164 | Calculate a new multiplier for time values. 165 | Input statement is complete timescale, for example: 166 | timescale 10ns end 167 | Input new_units is one of s|ms|us|ns|ps|fs. 168 | Return numeric multiplier. 169 | Also sets the package timescale variable. 170 | """ 171 | 172 | global timescale 173 | 174 | fields = statement.split() 175 | fields.pop() # delete end from array 176 | fields.pop(0) # delete timescale from array 177 | tscale = ''.join(fields) 178 | 179 | new_units = '' 180 | if (opt_timescale != ''): 181 | new_units = opt_timescale.lower() 182 | new_units = re.sub(r"\s", '', new_units) 183 | timescale = "1"+new_units 184 | 185 | else: 186 | timescale = tscale 187 | return 1 188 | 189 | 190 | mult = 0 191 | units = 0 192 | ts_match = re.match(r"(\d+)([a-z]+)", tscale) 193 | if ts_match: 194 | mult = int(ts_match.group(1)) 195 | units = ts_match.group(2).lower() 196 | 197 | else: 198 | VCDParseError("Error: Unsupported timescale found in VCD "\ 199 | "file: "+tscale+". Refer to the Verilog LRM.") 200 | 201 | 202 | mults = { 203 | 'fs' : 1e-15, 204 | 'ps' : 1e-12, 205 | 'ns' : 1e-09, 206 | 'us' : 1e-06, 207 | 'ms' : 1e-03, 208 | 's' : 1e-00, 209 | } 210 | mults_keys = mults.keys() 211 | mults_keys.sort(key=lambda x : mults[x]) 212 | usage = '|'.join(mults_keys) 213 | 214 | scale = 0 215 | if units in mults: 216 | scale = mults[units] 217 | 218 | else: 219 | VCDParseError("Error: Unsupported timescale units found in VCD "\ 220 | "file: "+units+". Supported values are: "+usage) 221 | 222 | 223 | new_scale = 0 224 | if new_units in mults: 225 | new_scale = mults[new_units] 226 | 227 | else: 228 | VCDParseError("Error: Illegal user-supplied "\ 229 | "timescale: "+new_units+". Legal values are: "+usage) 230 | 231 | 232 | return ((mult * scale) / new_scale) 233 | 234 | 235 | def get_timescale(): 236 | return timescale 237 | 238 | 239 | def get_endtime(): 240 | return endtime 241 | 242 | 243 | 244 | # =head1 NAME 245 | # 246 | # Verilog_VCD - Parse a Verilog VCD text file 247 | # 248 | # =head1 VERSION 249 | # 250 | # This document refers to Verilog::VCD version 1.10. 251 | # 252 | # =head1 SYNOPSIS 253 | # 254 | # from Verilog_VCD import parse_vcd 255 | # vcd = parse_vcd('/path/to/some.vcd') 256 | # 257 | # =head1 DESCRIPTION 258 | # 259 | # Verilog is a Hardware Description Language (HDL) used to model digital logic. 260 | # While simulating logic circuits, the values of signals can be written out to 261 | # a Value Change Dump (VCD) file. This module can be used to parse a VCD file 262 | # so that further analysis can be performed on the simulation data. The entire 263 | # VCD file can be stored in a Python data structure and manipulated using 264 | # standard hash and array operations. This module is also a good helper for 265 | # parsing fsdb files, since you can run fsd2vcd(part of the novas installation) 266 | # to convert them to the vcd format and then use this module. 267 | # 268 | # =head2 Input File Syntax 269 | # 270 | # The syntax of the VCD text file is described in the documentation of 271 | # the IEEE standard for Verilog. Only the four-state VCD format is supported. 272 | # The extended VCD format (with strength information) is not supported. 273 | # Since the input file is assumed to be legal VCD syntax, only minimal 274 | # validation is performed. 275 | # 276 | # =head1 SUBROUTINES 277 | # 278 | # 279 | # =head2 parse_vcd(file, $opt_ref) 280 | # 281 | # Parse a VCD file and return a reference to a data structure which 282 | # includes hierarchical signal definitions and time-value data for all 283 | # the specified signals. A file name is required. By default, all 284 | # signals in the VCD file are included, and times are in units 285 | # specified by the C<$timescale> VCD keyword. 286 | # 287 | # vcd = parse_vcd('/path/to/some.vcd') 288 | # 289 | # It returns a reference to a nested data structure. The top of the 290 | # structure is a Hash-of-Hashes. The keys to the top hash are the VCD 291 | # identifier codes for each signal. The following is an example 292 | # representation of a very simple VCD file. It shows one signal named 293 | # C, whose VCD code is C<+>. The time-value pairs 294 | # are stored as an Array-of-Tuples, referenced by the C key. The 295 | # time is always the first number in the pair, and the times are stored in 296 | # increasing order in the array. 297 | # 298 | # { 299 | # '+' : { 300 | # 'tv' : [ 301 | # ( 302 | # 0, 303 | # '1' 304 | # ), 305 | # ( 306 | # 12, 307 | # '0' 308 | # ), 309 | # ], 310 | # 'nets' : [ 311 | # { 312 | # 'hier' : 'chip.cpu.alu.', 313 | # 'name' : 'clk', 314 | # 'type' : 'reg', 315 | # 'size' : '1' 316 | # } 317 | # ] 318 | # } 319 | # } 320 | # 321 | # Since each code could have multiple hierarchical signal names, the names are 322 | # stored as an Array-of-Hashes, referenced by the C key. The example above 323 | # only shows one signal name for the code. 324 | # 325 | # 326 | # =head3 OPTIONS 327 | # 328 | # Options to C should be passed as a hash reference. 329 | # 330 | # =over 4 331 | # 332 | # =item timescale 333 | # 334 | # It is possible to scale all times in the VCD file to a desired timescale. 335 | # To specify a certain timescale, such as nanoseconds: 336 | # 337 | # vcd = parse_vcd(file, opt_timescale='ns'}) 338 | # 339 | # Valid timescales are: 340 | # 341 | # s ms us ns ps fs 342 | # 343 | # =item siglist 344 | # 345 | # If only a subset of the signals included in the VCD file are needed, 346 | # they can be specified by a signal list passed as an array reference. 347 | # The signals should be full hierarchical paths separated by the dot 348 | # character. For example: 349 | # 350 | # signals = [ 351 | # 'top.chip.clk', 352 | # 'top.chip.cpu.alu.status', 353 | # 'top.chip.cpu.alu.sum[15:0]', 354 | # ] 355 | # vcd = parse_vcd(file, siglist=signals) 356 | # 357 | # Limiting the number of signals can substantially reduce memory usage of the 358 | # returned data structure because only the time-value data for the selected 359 | # signals is loaded into the data structure. 360 | # 361 | # =item use_stdout 362 | # 363 | # It is possible to print time-value pairs directly to STDOUT for a 364 | # single signal using the C option. If the VCD file has 365 | # more than one signal, the C option must also be used, and there 366 | # must only be one signal specified. For example: 367 | # 368 | # vcd = parse_vcd(file, 369 | # use_stdout=1, 370 | # siglist=['top.clk'] 371 | # ) 372 | # 373 | # The time-value pairs are output as space-separated tokens, one per line. 374 | # For example: 375 | # 376 | # 0 x 377 | # 15 0 378 | # 277 1 379 | # 500 0 380 | # 381 | # Times are listed in the first column. 382 | # Times units can be controlled by the C option. 383 | # 384 | # =item only_sigs 385 | # 386 | # Parse a VCD file and return a reference to a data structure which 387 | # includes only the hierarchical signal definitions. Parsing stops once 388 | # all signals have been found. Therefore, no time-value data are 389 | # included in the returned data structure. This is useful for 390 | # analyzing signals and hierarchies. 391 | # 392 | # vcd = parse_vcd(file, only_sigs=1) 393 | # 394 | # =back 395 | # 396 | # 397 | # =head2 list_sigs(file) 398 | # 399 | # Parse a VCD file and return a list of all signals in the VCD file. 400 | # Parsing stops once all signals have been found. This is 401 | # helpful for deciding how to limit what signals are parsed. 402 | # 403 | # Here is an example: 404 | # 405 | # signals = list_sigs('input.vcd') 406 | # 407 | # The signals are full hierarchical paths separated by the dot character 408 | # 409 | # top.chip.cpu.alu.status 410 | # top.chip.cpu.alu.sum[15:0] 411 | # 412 | # =head2 get_timescale( ) 413 | # 414 | # This returns a string corresponding to the timescale as specified 415 | # by the C<$timescale> VCD keyword. It returns the timescale for 416 | # the last VCD file parsed. If called before a file is parsed, it 417 | # returns an undefined value. If the C C option 418 | # was used to specify a timescale, the specified value will be returned 419 | # instead of what is in the VCD file. 420 | # 421 | # vcd = parse_vcd(file); # Parse a file first 422 | # ts = get_timescale(); # Then query the timescale 423 | # 424 | # =head2 get_endtime( ) 425 | # 426 | # This returns the last time found in the VCD file, scaled 427 | # appropriately. It returns the last time for the last VCD file parsed. 428 | # If called before a file is parsed, it returns an undefined value. 429 | # 430 | # vcd = parse_vcd(file); # Parse a file first 431 | # et = get_endtime(); # Then query the endtime 432 | # 433 | # =head1 EXPORT 434 | # 435 | # Nothing is exported by default. Functions may be exported individually, or 436 | # all functions may be exported at once, using the special tag C<:all>. 437 | # 438 | # =head1 DIAGNOSTICS 439 | # 440 | # Error conditions cause the program to raise an Exception. 441 | # 442 | # =head1 LIMITATIONS 443 | # 444 | # Only the following VCD keywords are parsed: 445 | # 446 | # $end $scope 447 | # $enddefinitions $upscope 448 | # $timescale $var 449 | # 450 | # The extended VCD format (with strength information) is not supported. 451 | # 452 | # The default mode of C is to load the entire VCD file into the 453 | # data structure. This could be a problem for huge VCD files. The best solution 454 | # to any memory problem is to plan ahead and keep VCD files as small as possible. 455 | # When simulating, dump fewer signals and scopes, and use shorter dumping 456 | # time ranges. Another technique is to parse only a small list of signals 457 | # using the C option; this method only loads the desired signals into 458 | # the data structure. Finally, the C option will parse the input VCD 459 | # file line-by-line, instead of loading it into the data structure, and directly 460 | # prints time-value data to STDOUT. The drawback is that this only applies to 461 | # one signal. 462 | # 463 | # =head1 BUGS 464 | # 465 | # There are no known bugs in this module. 466 | # 467 | # =head1 SEE ALSO 468 | # 469 | # Refer to the following Verilog documentation: 470 | # 471 | # IEEE Standard for Verilog (c) Hardware Description Language 472 | # IEEE Std 1364-2005 473 | # Section 18.2, "Format of four-state VCD file" 474 | # 475 | # =head1 AUTHOR 476 | # 477 | # Originally written in Perl by Gene Sullivan (gsullivan@cpan.org) 478 | # Translated into Python by Sameer Gauria (sgauria+python@gmail.com) 479 | # 480 | # Plus the following patches : 481 | # - Scott Chin : Handle upper-case values in VCD file. 482 | # - Sylvain Guilley : Fixed bugs in list_sigs. 483 | # - Bogdan Tabacaru : Fix bugs in globalness of timescale and endtime 484 | # - Andrew Becker : Fix bug in list_sigs 485 | # - Pablo Madoery : Found bugs in siglist and opt_timescale features. 486 | # - Matthew Clapp itsayellow+dev@gmail.com : Performance speedup, Exception, print, open, etc cleanup to make the code more robust. 487 | # Thanks! 488 | # 489 | # =head1 COPYRIGHT AND LICENSE 490 | # 491 | # Copyright (c) 2012 Gene Sullivan, Sameer Gauria. All rights reserved. 492 | # 493 | # This module is free software; you can redistribute it and/or modify 494 | # it under the same terms as Perl itself. See L. 495 | # 496 | # =cut 497 | 498 | --------------------------------------------------------------------------------