├── .gitattributes
├── src
    ├── tmds_output.pio
    ├── vsync.pio
    ├── lcd_cap_9bpp.pio
    ├── pico_pll_example.c
    ├── pico_multicore_example.c
    ├── vga_output_9bpp.pio
    ├── tmds_encode.S
    ├── lcd_cap_15bpp_mux.pio
    └── out_dma_manager.S
├── README.md
├── scripts
    ├── tmds_util.h
    ├── tmds_util_colorcor.c
    └── tmds_util.c
└── docs
    └── DOCUMENTATION.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/src/tmds_output.pio:
--------------------------------------------------------------------------------
 1 | // TMDS output
 2 | // OSR: shift to right, autopull, threshold 32(?)
 3 | // Subject to change; output method "borrowed" from PicoDVI.
 4 | 
 5 | .program tmds_output
 6 | .side_set 2
 7 | .origin 0
 8 | 
 9 | 	out pc, 1 side 0b10
10 | 	out pc, 1 side 0b01
11 | 


--------------------------------------------------------------------------------
/src/vsync.pio:
--------------------------------------------------------------------------------
 1 | // Vertical sync capture re-sync code
 2 | // Sends something (NULL data or all $FF at this point) through the FIFO to initiate a DMA transfer to reset the input buffer position.
 3 | // GPIO pin currently undefined, currently referred to with alias 'V'.
 4 | // I forget what the vsync pulse polarity is, so the wait pin may need to be changed.
 5 | .program vsync_interruptor
 6 | 
 7 | public entry_point:
 8 | .wrap_target
 9 | 	wait 0 gpio V
10 | 	wait 1 gpio V
11 | 	in NULL, 32
12 | 	push
13 | .wrap
14 | 


--------------------------------------------------------------------------------
/src/lcd_cap_9bpp.pio:
--------------------------------------------------------------------------------
 1 | // Captures the LCD data, for prototype VGA testing.
 2 | // LCD data is from GP0 to GP9, so only 10 bits per pixel. (RRR GGGG BBB)
 3 | // This is done so the VGA output can also be 10 bits per pixel.
 4 | // Horizontal sync, vertical sync and pixel clock are GP10-GP12.
 5 | // Shifts in data MSB first.
 6 | 
 7 | // PINCTRL_IN_BASE = 0
 8 | .program lcd_cap_9bpp
 9 | 
10 | public entry_point:
11 | .wrap_target
12 | 	wait 0 gpio 12
13 | 	wait 0 gpio 10
14 | 	in NULL, 6
15 | 	in pins, 10
16 | 	wait 1 gpio 12
17 | 	wait 0 gpio 12
18 | 	in NULL, 6
19 | 	in pins, 10
20 | 	push
21 | 	wait 1 gpio 12
22 | .wrap
23 | 


--------------------------------------------------------------------------------
/src/pico_pll_example.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	An example I made for the RPi Pico PLL.
 3 | 	Non-functional, but just to provide some context.
 4 | */
 5 | 
 6 | #include <stdio.h>
 7 | #include "pico/stdlib.h"
 8 | 
 9 | int main()
10 | {
11 | 	stdio_init_all();
12 | 
13 | 	uint32_t sys_clock_khz = 294000;
14 | 
15 | 	uint vco_freq_out, post_div1_out, post_div2_out;
16 | 
17 | 	bool clock_valid = check_sys_clock_khz(sys_clock_khz, &vco_freq_out, &post_div1_out, &post_div2_out);
18 | 	
19 | 	if(clock_valid==false)
20 | 	{
21 | 		printf("Oh no! This clock doesn't work.\n");
22 | 		exit(1);
23 | 	}
24 | 	else
25 | 	{
26 | 		printf("Hurray! Setting VCO to %d, div1 to %d, and div2 to %d\n", vco_freq_out, post_div1_out, post_div2_out);
27 | 		
28 | 		set_sys_clock_pll((uint32_t)vco_freq_out, post_div1_out, post_div2_out);
29 | 	}
30 | 	while(1)
31 | 	{
32 | 		do_nothing();
33 | 	}
34 | }


--------------------------------------------------------------------------------
/src/pico_multicore_example.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	This code was not written by me.
 3 | 	It was taken out of the Raspberry Pi Pico SDK manual.
 4 | 	It is here to provide an example of how to use the second core.
 5 | 	I don't think it's functional because of some placeholder code.
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include "pico/stdlib.h"
10 | #include "pico/multicore.h"
11 | 
12 | #define FLAG_VALUE 123
13 | 
14 | void core1_entry()
15 | {
16 | 	multicore_fifo_push_blocking(FLAG_VALUE);
17 | 
18 | 	uint32_t g = multicore_fifo_pop_blocking();
19 | 
20 | 	if (g != FLAG_VALUE)
21 | 		printf("Hmm, that's not right on core 1!\n");
22 | 	else
23 | 		printf("It's all gone well on core 1!");
24 | 
25 | 	while(1)
26 | 		tight_loop_contents();
27 | }
28 | 
29 | int main()
30 | {
31 | 	stdio_init_all();
32 | 	printf("Hello, multicore!\n");
33 | 
34 | 	multicore_launch_core1(core1_entry);
35 | 
36 | 	// Wait for it to start up.
37 | 
38 | 	uint32_t g = multicore_fifo_pop_blocking();
39 | 
40 | 	if (g != FLAG_VALUE)
41 | 		printf("Hmm, that's not right on core 0!\n");
42 | 	else
43 | 	{
44 | 		multicore_fifo_push_blocking(FLAG_VALUE);
45 | 		printf("It's all gone well on core 0!");
46 | 	}
47 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Gameboy to HDMI adapter using the RP2040 / Raspberry Pi Pico
 2 | Why does everyone keep telling me that FPGAs are better suited to do this kind of stuff? They're expensive to prototype and design with most of the time, difficult to program with open-source tools, and usually have high power consumption for a device that actually is capable enough for what would be normal\(ish\) battery-powered applications\.
 3 | 
 4 | ---
 5 | 
 6 | ### Purpose of this project
 7 | I think this project may be the largest/most complex project I've taken on aside from GBAudioPlayer, so I wouldn't be making this public without a good reason\! The purpose of this on the surface is just to convert the Gameboy's LCD signals to display via HDMI, pretty simple\. However, my motives for this project aren't as simple\. I want to:
 8 | - Prove everyone that it CAN be done
 9 | - Challenge myself
10 | - Push the RP2040 to its limits
11 | - Learn how to program in ARM assembly and how to interface with more advanced hardware than the Gameboy \(I come from a background of programming in assembly for the Gameboy\)
12 | - Create the cheapest DIY HDMI output solution for the Gameboy in the wild
13 | 
14 | ---
15 | 
16 | ### Documentation
17 | Documentation about my journey and the info I gathered to make this project possible can be found in the `docs` folder\. Since this is a relatively new project, I will be updating it frequently\.
18 | 
19 | ---
20 | 
21 | ### When's it going to be finished?
22 | I don't know when I'm going to finish this project, but I predict that it's going to take a few months to get it fully working\. However, I will get some parts of it working one by one, and I might even upload progress clips to YouTube and put the links here\.
23 | 


--------------------------------------------------------------------------------
/src/vga_output_9bpp.pio:
--------------------------------------------------------------------------------
 1 | // Outputs the VGA data and sync signals.
 2 | // OSR: shift to right, autopull, threshold 32(?)
 3 | // The pixel clock is ~8.39MHz since the GB is upscaled 3x to 720x480 (1/3rd of the normal pixel clock.)
 4 | // Shifts out data MSB first.
 5 | // GP13-GP22 are the 10 bits of video data, and GP23-GP24 are horizontal and vertical sync.
 6 | // Ideally, there should be a good 16 clock cycles for each pixel, so the system clock should be 134.24MHz, or something like that.
 7 | 
 8 | .program vga_out_9bpp
 9 | 
10 | pixel_start:
11 | 	pull
12 | 	mov x, osr // Left image border
13 | 	pull
14 | 	mov y, osr // Active image area
15 | 	nop side 1
16 | left_border:
17 | 	nop [14]
18 | 	jmp x-- left_border
19 | pixel_loop:
20 | 	pull
21 | 	nop [10]
22 | 	out pins, 12
23 | 	out NULL, 4
24 | 	jmp y-- pixel_loop
25 | 	pull
26 | 	mov x, osr // Right image border
27 | 	pull 
28 | 	mov y, osr //
29 | 	nop side 0
30 | hsync_pulse_stall:
31 | 	nop [14]
32 | 	jmp y-- hsync_pulse_stall
33 | 	// Trigger IRQ for hsync pulse
34 | 	wait irq, line_end
35 | 	jmp pixel_start
36 | 
37 | // Here is the code for the VGA sync stuff.
38 | hsync_start:
39 | 	pull
40 | 	mov x, osr // Hsync pulse duration
41 | 	pull
42 | 	mov y, osr // Lines before vsync pulse
43 | 	set pins, 1
44 | 	// Trigger IRQ to start the line
45 | 	wait irq, line_end // Sent by the pixel output loop
46 | 	set pins, 0
47 | hsync_loop:
48 | 	wait irq, pixel_clk
49 | 	jmp x-- hsync_loop
50 | 	set pins, 1
51 | 	jmp y-- hsync_start
52 | 	// Send the vsync pulse interrupt
53 | 	jmp hsync_start
54 | //14 instructions
55 | 
56 | vsync_start:
57 | 	pull
58 | 	mov x, osr // Sync pulse duration
59 | 	mov y, x
60 | vsync_pulse:
61 | 	// Trigger IRQ to start hsync again
62 | 	wait irq, pulse_start
63 | vsync_loop:
64 | 	wait irq, line_end
65 | 	jmp x-- vsync_loop
66 | 	set pins, 1
67 | 	mov x, y
68 | 	jmp vsync_pulse
69 | 


--------------------------------------------------------------------------------
/src/tmds_encode.S:
--------------------------------------------------------------------------------
 1 | // Edit: I'm probably going to use arm-none-eabi or something since I'll be integrating this with a little bit of C code to make things easier
 2 | // spoiler alert, this is the first time I've programmed in ARM assembly, but hopefully some of the things I've learned from GB ASM can carry over
 3 | 
 4 | // load and store multiple cycle count is 1+number of registers to load/store
 5 | 
 6 | .global SeparatePixel
 7 | .global GetTMDSDisparity
 8 | .global PackTMDS
 9 | 
10 | .code 16 // this is THUMB assembly so the width is 16 bits(?) (the processor only accepts THUMB instructions by default so idk)
11 | 
12 | 	// Mask is 0x1f<<1
13 | .macro SeparatePixel red, green, blue, mask, pixel
14 | 	and \red, \mask, [\pixel, lsl#1]
15 | 	and \green, \mask, [\pixel, lsr#4]
16 | 	and \blue, \mask, [pixel, lsr#9]
17 | .endm
18 | 	
19 | 	// If possible, the macros will just make it easier to do things in an unrolled loop
20 | 	// Err, more human-friendly
21 | 
22 | 	// Disparity is already shifted into the correct position when retrieving it from the TMDS LUT
23 | 
24 | .macro GetTMDSDisparity channel, disparity, chaddr, tmds_lut, tmds
25 | 	orr \chaddr, \channel, \disparity
26 | 	add \chaddr, \chaddr, \tmds_lut
27 | 	ldmia \chaddr, {\tmds, \disparity} // 3 cycles
28 | .endm
29 | 
30 | 	// Takes the left and right amounts for packing the current tripled pixel.
31 | 	// Changes them by 2.
32 | .macro PackTMDS lshift, rshift, pix_0, pix_1
33 | 	lsr \pix_0 \rshift
34 | 	orr \pix_0, \pix_0, [\pix_1 lsl \lshift]
35 | 	add \rshift, \rshift, #2
36 | 	sub \lshift, \lshift, #2 // could be the wrong way right now
37 | 	// if it's 4 cycles total, 4*15=60 clock cycles for this macro to pack all values needed.
38 | .endm
39 | 
40 | 	// Output pixel is repeated 3 times, but disparity is still needed for it.
41 | 	// The same line is repeated 3 times, but the already encoded line is enough since disparity is reset at the end of a line
42 | 	// At 912x539, the CPU has up to 912*10/240 = 38 clock cycles to encode a color channel, or 608 clock cycles to pack
43 | 	// 16 of those values into 15 32-bit words.
44 | 	// 27360 total cycles for encoding all the pixels for a line at the most- a good way to encode a line is to have 3 buffers
45 | 	// of 240 color values (1 value per word) created with SeparatePixel, and then clobber those buffers with encoded TMDS.
46 | 	// (Perhaps do it 16 pixels at a time because of packing.)
47 | 	// Each entry of the LUT contains 3 properly-encoded repeated pixels for the input value and a 4-bit output disparity,
48 | 	// which is already shifted into position where it can be ORed with the next color value.
49 | 
50 | 


--------------------------------------------------------------------------------
/src/lcd_cap_15bpp_mux.pio:
--------------------------------------------------------------------------------
 1 | // Captures the LCD data.
 2 | // LCD data is from GP2 to GP9, multiplexed by 2 74AHC541 octal buffers; OE1 is GP0, and OE2 is GP1.
 3 | // This is done so there's enough GPIO left on the regular RPi Pico board to do audio input.
 4 | // Horizontal sync, vertical sync and pixel clock are GP10-GP12.
 5 | // IIRC vsync is active negative on GBC/GBA and positive on DMG. (DMG may just be inverted with hsync too?)
 6 | // Horizontal sync and pixel clock need to be low in order to shift in one pixel.
 7 | // Shifts in LCD data from the right, to the left.
 8 | // Buffer output enable is little-endian, but shifting is big-endian (lowest GPIO gets highest bit.)
 9 | // 1 Pi clock cycle lasts about 4 nanoseconds at 252MHz, and the '541 has a max enable/disable time of 14 nanoseconds*, 6 clock cycles or more of delay is safe enough.
10 | // *at room temperature (25C / 77F)
11 | // 2 + 8 + 3 pins = 13 pins for LCD capture (vs. 18 traditionally)
12 | // + 2 audio + 8 HDMI = 23 out of 26 pins
13 | // Which means 2 more could be used to connect a SNES controller to the console, and maybe one more could generate the system clock.
14 | 
15 | // So this is basically the BOM:
16 | // - 1x RPi Pico
17 | // - 1x Adafruit DVI/HDMI adapter board
18 | // - 1x Custom PCB
19 | // - 1x 50/40/32/34 pin FPC (or a combination of them, depending on the system used)
20 | // - 1x Adafruit FPC adapter board (will have pin headers for those on a prototype board or a board option?)
21 | // - 1x ribbon cable (to pair with connector)
22 | // - 2x 74AHC541 octal buffer
23 | // - 2x 74HC595 shift register
24 | // - 1x SNES controller connector
25 | // - 1x stereo audio jack
26 | // - 1x (trim?) potentiometer (for adjusting audio input volume)
27 | // - ?x resistors
28 | 
29 | // PINCTRL_IN_BASE = 0
30 | .program lcd_capture
31 | 
32 | public entry_point:
33 | 	set pins, 0b00010 [delay] //some delay to let the pin states settle
34 | .wrap_target
35 | 	wait 0 gpio 12
36 | 	wait 0 gpio 10
37 | 
38 | //These 6 instructions have about 30 clock cycles to finish during the low edge of the pixel clock
39 | 	in pins, 32
40 | 	set pins, 0b00011 [6] //same as above; OE is active low, so I set it high to disable it for a little bit
41 | 	in NULL, 2
42 | 	mov x, isr
43 | 	set pins, 0b00001 [8] //again
44 | 	in pins, 32
45 | 
46 | 	in NULL, 2
47 | 	mov y, isr
48 | 	in x, 8
49 | 	in y, 7
50 | 	mov x, isr
51 | 	wait 1 gpio 12
52 | 	wait 0 gpio 12
53 | 
54 | //These 6 instructions have about 30 clock cycles to finish during the low edge of the pixel clock
55 | 	in pins, 32
56 | 	set pins, 0b00011 [6]
57 | 	in NULL, 2
58 | 	mov y, isr
59 | 	set pins, 0b00010 [8]
60 | 	in pins, 8
61 | 
62 | 	in y, 7
63 | 	in NULL, 17
64 | 	mov y, isr
65 | 	in x, 15
66 | 	in NULL, 1
67 | 	in y, 15
68 | 	push
69 | 	wait 1, gpio 12
70 | .wrap
71 | 


--------------------------------------------------------------------------------
/src/out_dma_manager.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	Output DMA manager
 3 | 	Monitors the state of output DMA channels and updates the information of the chain DMA channels.
 4 | 
 5 | 	These 3 chain DMA channels (channels 3, 4 and 5) have a ring size of 16 bytes (4 words) and transfer 4 words in order to reset the main output DMA channels.
 6 | 	The 3 channels that chain to them (channels 0, 1 and 2) transfer 911 words before they stop and chain to the reset channels.
 7 | 	Since the output resolution is at max a 3x scale of the Gameboy's resolution (720x480,) each input line is repeated three times, at which point this program switches the line buffer address the chain DMA feeds to the main DMA when it resets.
 8 | 	This is because on the output, there is a double line buffer, each line being a fully encoded TMDS line. This is necessary so that the CPU has enough time to construct a whole TMDS line, which involves converting pixels from the double frame buffer to TMDS format, repeating them, and adding sync words at the end of the line. In the future, if audio over HDMI is added, the CPU also needs enough time to grab encoded audio from the audio buffer and put that into the signal.
 9 | 	The resolution is 912 by 539 pixels, including the blanking areas.
10 | 
11 | 	DMA channel 0 DREQ: DREQ_PIO0_TX0 (0)
12 | 	DMA channel 1 DREQ: DREQ_PIO0_TX1 (1)
13 | 	DMA channel 2 DREQ: DREQ_PIO0_TX2 (2)
14 | 
15 | 	DMA base address: 0x50000000
16 | 	All DMAs are set to IRQ_QUIET, which I think sets bit 21 of CHx_CTRL_TRIG.
17 | 	DATA_SIZE is always a full word, so the value is 0x2.
18 | 
19 | 	Offset - Name - Value
20 | 	0x000 CH0_READ_ADDR = Channel 0 TMDS buffer
21 | 	0x004 CH0_WRITE_ADDR = PIO0_RX0
22 | 	0x008 CH0_TRANS_CNT = 912
23 | 	0x00C CH0_CTRL_TRIG
24 | 	TREQ_SEL = DREQ_PIO0_TX0 (0b000000)
25 | 	CHAIN_TO = Channel 3 (0b0011)
26 | 	INCR_WRITE = 0
27 | 	INCR_READ = 1
28 | 
29 | 	0x040 CH1_READ_ADDR = Channel 1 TMDS buffer
30 | 	0x044 CH1_WRITE_ADDR = PIO_RX1
31 | 	0x048 CH1_TRANS_CNT = 912
32 | 	0x04C CH1_CTRL_TRIG
33 | 	TREQ_SEL = DREQ_PIO0_TX1 (0b000001)
34 | 	CHAIN_TO = Channel 4 (0b0100)
35 | 	INCR_WRITE = 0
36 | 	INCR_READ = 1
37 | 
38 | 	0x080 CH2_READ_ADDR = Channel 2 TMDS buffer
39 | 	0x084 CH2_WRITE_ADDR = PIO_RX2
40 | 	0x088 CH2_TRANS_CNT = 912
41 | 	0x08C CH2_CTRL_TRIG
42 | 	TREQ_SEL = DREQ_PIO0_TX2 (0b000010)
43 | 	CHAIN_TO = Channel 5 (0b0101)
44 | 	INCR_WRITE = 0
45 | 	INCR_READ = 1
46 | 
47 | 	Not sure if I'm going to use these:
48 | 
49 | 	0x0C0 CH3_READ_ADDR
50 | 	0x0C4 CH3_WRITE_ADDR = Channel 0 base (0x000)
51 | 	0x0C8 CH3_TRANS_CNT = 4
52 | 	0x0CC CH3_CTRL_TRIG
53 | 	TREQ_SEL = 0x3f (unpaced transfer)
54 | 	CHAIN_TO = Channel 0 (0b0000)
55 | 	RING_SEL = 1
56 | 	RING_SIZE = 16 bytes (0x4)
57 | 	INCR_WRITE = 1
58 | 	INCR_READ = 1
59 | 
60 | 	0x100 CH4_READ_ADDR
61 | 	0x104 CH4_WRITE_ADDR = Channel 1 base (0x040)
62 | 	0x108 CH4_TRANS_CNT = 4
63 | 	0x10C CH4_CTRL_TRIG
64 | 	TREQ_SEL = 0x3f (unpaced transfer)
65 | 	CHAIN_TO = Channel 1 (0b0001)
66 | 	RING_SEL = 1
67 | 	RING_SIZE = 16 bytes (0x4)
68 | 	INCR_WRITE = 1
69 | 	INCR_READ = 1
70 | 
71 | 	0x140 CH5_READ_ADDR
72 | 	0x144 CH5_WRITE_ADDR = Channel 2 base (0x080)
73 | 	0x148 CH5_TRANS_CNT = 4
74 | 	0x14C CH5_CTRL_TRIG
75 | 	TREQ_SEL = 0x3f (unpaced transfer)
76 | 	CHAIN_TO = Channel 2 (0b0010)
77 | 	RING_SEL = 1
78 | 	RING_SIZE = 16 bytes (0x4)
79 | 	INCR_WRITE = 1
80 | 	INCR_READ = 1
81 | 
82 | */
83 | 


--------------------------------------------------------------------------------
/scripts/tmds_util.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	tmds_util.h
  3 | 
  4 | 	Various definitions/declarations of values, structs, and function prototypes for tmds_util.c to make things less messy.
  5 | */
  6 | 
  7 | #define H_ACTIVE 720
  8 | #define H_FRONT 32
  9 | #define H_PULSE 64
 10 | #define H_BACK 96
 11 | #define H_TOTAL 912
 12 | 
 13 | #define V_ACTIVE 480
 14 | #define V_FRONT 13
 15 | #define V_PULSE 8
 16 | #define V_BACK 38
 17 | #define V_TOTAL 539
 18 | 
 19 | #define AVI_PACKET_TYPE 0x82
 20 | #define HDMI_VERSION 0x02
 21 | #define AVI_PACKET_LENGTH 13 // 0x0D
 22 | #define AVI_HEADER_CHECKSUM 0x91
 23 | 
 24 | // The VIC bits of the AVI InfoFrame data byte 4 are either 0x02 or 0x03
 25 | // because the active video is technically 720x480p 60Hz.
 26 | // All other bytes should be set to zero.
 27 | // Therefore, if the VIC bits are actually used, the checksum should be
 28 | // 0x02 or 0x03.
 29 | 
 30 | struct tmds_pixel_t
 31 | {
 32 | 	uint8_t color_data_5b;
 33 | 	uint8_t color_data;
 34 | 	uint16_t tmds_data;
 35 | 	int disparity;
 36 | };
 37 | 
 38 | struct sync_buffer_t
 39 | {
 40 | 	// Normal hblank
 41 | 	uint16_t *hblank_ch0;
 42 | 	uint16_t *hblank_ch1;
 43 | 	uint16_t *hblank_ch2;
 44 | 	// Entering and most of vblank: no video preamble or guard bands included, falling edge of vsync
 45 | 	uint16_t *vblank_en_ch0;
 46 | 	uint16_t *vblank_en_ch1;
 47 | 	uint16_t *vblank_en_ch2;
 48 | 	// Vsync: no video preamble or guard bands, with the addition of vsync pulse
 49 | 	uint16_t *vblank_syn_ch0;
 50 | 	uint16_t *vblank_syn_ch1;
 51 | 	uint16_t *vblank_syn_ch2;
 52 | 	// Exiting vblank: the last hblank of the frame is just a normal hblank, except with the addition of rising edge of vsync
 53 | 	// (or first hblank before active video data)
 54 | 	uint16_t *vblank_ex_ch0;
 55 | 	uint16_t *vblank_ex_ch1;
 56 | 	uint16_t *vblank_ex_ch2;
 57 | };
 58 | 
 59 | struct sync_buffer_32_t
 60 | {
 61 | 	uint32_t *hblank_ch0;
 62 | 	uint32_t *hblank_ch1;
 63 | 	uint32_t *hblank_ch2;
 64 | 
 65 | 	uint32_t *vblank_en_ch0;
 66 | 	uint32_t *vblank_en_ch1;
 67 | 	uint32_t *vblank_en_ch2;
 68 | 
 69 | 	uint32_t *vblank_syn_ch0;
 70 | 	uint32_t *vblank_syn_ch1;
 71 | 	uint32_t *vblank_syn_ch2;
 72 | 
 73 | 	uint32_t *vblank_ex_ch0;
 74 | 	uint32_t *vblank_ex_ch1;
 75 | 	uint32_t *vblank_ex_ch2;
 76 | };
 77 | 
 78 | struct infoframe_header_t
 79 | {
 80 | 	uint8_t packet_type;
 81 | 	uint8_t version;
 82 | 	uint8_t packet_length;
 83 | 	uint8_t header_checksum;
 84 | 	uint16_t *terc4_r_header; //32*sizeof(uint16_t)
 85 | 	uint32_t *terc4_en_header; //10*sizeof(uint32_t)
 86 | 	// 16 TMDS words per 5 32-bit words; each packet is 32 TMDS words long, or 10 32-bit words
 87 | 	// OR with sync_masks[1] for normal hsync and sync_masks[0] for hsync during vsync (oops)
 88 | };
 89 | 
 90 | struct infoframe_packet_t
 91 | {
 92 | 	uint8_t packet_checksum;
 93 | 	uint8_t *packet_data; // malloc(31)
 94 | 	uint16_t *terc4_r_ch1; // Channel 1 gets lower nibble, malloc(32*sizeof(uint16_t))
 95 | 	uint16_t *terc4_r_ch2; // Channel 2 gets higher nibble
 96 | 	uint32_t *terc4_en_ch1; // r = unpacked data, en = packed data malloc(10*sizeof(uint16_t))
 97 | 	uint32_t *terc4_en_ch2;
 98 | };
 99 | 
100 | // Function header prototypes
101 | void free_sync_buffers(struct sync_buffer_t *sync_buffer);
102 | void free_sync_buffers_32(struct sync_buffer_32_t *sync_buffer);
103 | void allocate_sync_buffer(uint16_t **buffer);
104 | void allocate_sync_buffer_32(uint32_t **buffer);
105 | void create_sync_buffers();
106 | void create_sync_buffers_nodat();
107 | 
108 | void pack_buffer_single(uint16_t *in_buffer, uint32_t *out_buffer, int buffer_size);
109 | void create_sync_files(char *name, struct sync_buffer_t *sync_buffer);
110 | 
111 | uint16_t tmds_xor(uint8_t color_data);
112 | uint16_t tmds_xnor(uint8_t color_data);
113 | int ones_count(uint8_t color_data);
114 | void tmds_calc_disparity(struct tmds_pixel_t *tmds_pixel);
115 | void tmds_pixel_repeat(uint32_t *lut_buf, struct tmds_pixel_t *tmds_pixel);
116 | 
117 | uint8_t depth_convert(uint8_t c_in);
118 | void create_avi_infoframe();
119 | 
120 | void create_solid_line(char *name, struct tmds_pixel_t *pixel);
121 | 


--------------------------------------------------------------------------------
/scripts/tmds_util_colorcor.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	
  3 | 	OLD, OUTDATED PROGRAM
  4 | 	ONLY KEPT FOR POTENTIAL FUTURE REFERENCE/ARCHIVING PURPOSES
  5 | 
  6 | 	tmds_util.c
  7 | 
  8 | 	This program generates the TMDS output data/lookup tables for the Raspberry Pi Pico/RP2040.
  9 | 	And various other utilities.
 10 | 	It also generates color correction lookup tables for "emulating" the GBC and GBA LCD colors.
 11 | 	They may go unused in the final device because of timing constraints.
 12 | 
 13 | 	TO DO:
 14 | 	-Add TMDS control word LUT/generation
 15 | 	-Add TMDS audio LUT generation
 16 | 
 17 | 	Traditionally:
 18 | 	-Separate color channels from shared word into different words (1 pixel only)
 19 | 	-Get results individually
 20 | 	-Convert into TMDS individually and put into different buffers one at a time
 21 | 
 22 | 	Variables needed per channel: raw color data, last TMDS data, TMDS data buffer address
 23 | 	Universal variables needed: raw pixel data, framebuffer address
 24 | 	Total predicted registers used for video encoding: 11
 25 | 	pix = fbuffer[fbaddr++];
 26 | 
 27 | 	raw_r = pix&0xff, pix>>=8;
 28 | 	raw_r |= (last_r&0x1f000000)>>16; //2 instructions?
 29 | 	last_r = tmds_lut[raw_r];
 30 | 	rbuffer[raddr++] = last_r; //2 instructions?
 31 | 
 32 | 	raw_g = pix&0xff, pix>>=8;
 33 | 	raw_g |= (last_g&0x1f000000)>>16;
 34 | 	last_g = tmds_lut[raw_g];
 35 | 	gbuffer[gaddr++] = last_g;
 36 | 
 37 | 	raw_b = pix;
 38 | 	raw_b |= (last_b&0x1f000000)>>16;
 39 | 	last_b = tmds_lut[raw_b];
 40 | 	bbuffer[baddr++] = last_b;
 41 | 
 42 | 	6 instructions per channel = 18 instructions per pixel
 43 | 
 44 | 	Or do this (assuming 24bpp input):
 45 | 	pix = fbuffer[fbaddr++]; //This is done for however wide the framebuffer is horizontally, then the line buffer is used
 46 | 	raw_cc = pix&0xff, pix>>=8;
 47 | 	lbuffer[lbaddr++] = pix; //This is repeated for horizontal width and then reset
 48 | 	raw_cc |= (cc_tmds&0x1f000000)>>16; //cc_tmds is init'd to zero
 49 | 	cc_tmds = tmds_lut[raw_cc];
 50 | 	tlbuffer[tladdr++] = cc_tmds;
 51 | 
 52 | 	On the other 2 color channels:
 53 | 	lbaddr = 0;
 54 | 	loop point:
 55 | 	pix = lbuffer[lbaddr++];
 56 | 	raw_cc = pix&0xff, pix>>=8;
 57 | 	lbuffer[lbaddr++] = pix;
 58 | 	raw_cc |= (cc_tmds&0x1f000000)>>16;
 59 | 	cc_tmds = tmds_lut[raw_cc];
 60 | 	tlbuffer[tladdr++] = cc_tmds;
 61 | */
 62 | #include <stdio.h>
 63 | #include <stdlib.h>
 64 | #include <stdbool.h>
 65 | #include <string.h>
 66 | #include <stdint.h>
 67 | #include <math.h>
 68 | #include <unistd.h>
 69 | 
 70 | struct tmds_pixel_t
 71 | {
 72 | 	uint8_t color_data;
 73 | 	uint16_t tmds_data;
 74 | 	int disparity;
 75 | };
 76 | 
 77 | // THIS IS OUTDATED! THE PIOS CAN PERFORM THIS ON THE FLY, SO THIS IS NOT NECESSARY!!!
 78 | uint32_t tmds_interleave(uint16_t tmds_data, int disparity)
 79 | {
 80 | 	uint8_t unsd = ((uint8_t)(disparity+16))&0x1f;
 81 | 	uint32_t reg_word = (uint32_t)tmds_data;
 82 | 	uint32_t inv_word = (~reg_word)<<1;
 83 | 	uint32_t out_word = 0;
 84 | 	for(int i=0; i<10; i++)
 85 | 	{
 86 | 		out_word |= (((reg_word&0x01)|(inv_word&0x02))<<20);
 87 | 		reg_word = reg_word>>1;
 88 | 		inv_word = inv_word>>1;
 89 | 		out_word = out_word>>2;
 90 | 	}
 91 | 	out_word |= unsd<<24;
 92 | 	//format = nnnDDDDD-nnnnNPNP-NPNPNPNP-NPNPNPNP (little endian)
 93 | 	//n = not used
 94 | 	//D = disparity
 95 | 	//N = negative TMDS data
 96 | 	//P = positive TMDS data
 97 | 	//Disparity is stored in the top byte.
 98 | }
 99 | 
100 | // little endian
101 | // Input: 5-bit color value, right aligned.
102 | uint16_t tmds_xor(uint8_t color_data)
103 | {
104 | 	uint16_t this_color = (uint16_t)color_data;
105 | 	uint16_t tmds_word = (this_color&0x01)<<14;
106 | 	this_color = this_color>>1;
107 | 	for(int i=0; i<8; i++)
108 | 	{
109 | 		//shifts bit 0 of this_color to bit 14 to be XORed with tmds_word
110 | 		//so it can be put back, shifted right and XORed again
111 | 		tmds_word |= ((((this_color&0x01)<<14)^(tmds_word&0x4000))<<1)&0x8000;
112 | 		tmds_word = tmds_word>>1;
113 | 		this_color = this_color>>1;
114 | 	}
115 | 	tmds_word = tmds_word>>6; //this is probably 6 since how I did the shifts it occupies the upper 10 bits instead of 9
116 | 	tmds_word |= 0x100;
117 | 	return tmds_word;
118 | }
119 | 
120 | uint16_t tmds_xnor(uint8_t color_data)
121 | {
122 | 	uint16_t this_color = (uint16_t)color_data;
123 | 	uint16_t tmds_word = (this_color&0x01)<<14;
124 | 	this_color = this_color>>1;
125 | 	for(int i=0; i<8; i++)
126 | 	{
127 | 		//shifts bit 0 of this_color to bit 14 to be XNORed with tmds_word
128 | 		//so it can be put back, shifted right and XNORed again
129 | 		tmds_word |= (~(((this_color&0x01)<<14)^(tmds_word&0x4000))<<1)&0x8000;
130 | 		tmds_word = tmds_word>>1;
131 | 		this_color = this_color>>1;
132 | 	}
133 | 	tmds_word = tmds_word>>6;
134 | 	return tmds_word;
135 | }
136 | 
137 | // Return the signed value of the difference between 1s and 0s.
138 | // Positive = more 1s, negative = more 0s.
139 | /*
140 | int bit_diff_s(uint16_t tmds_data)
141 | {
142 | 	int ones_cnt = (int)ones_count(tmds_data);
143 | 	int zeros_cnt = (int)zeros_count(tmds_data);
144 | 	int difference = ones_cnt-zeros_cnt;
145 | 	return difference;
146 | }
147 | */
148 | 
149 | uint8_t ones_count(uint16_t color_data)
150 | {
151 | 	uint16_t this_color = color_data;
152 | 	uint8_t ones_cnt = 0;
153 | 	for(int i=0; i<9; i++)
154 | 	{
155 | 		if((this_color&0x01)==1) 
156 | 			ones_cnt++;
157 | 		this_color = this_color>>1;
158 | 	}
159 | 	return ones_cnt;
160 | }
161 | 
162 | uint8_t bit_diff(uint16_t tmds_data)
163 | {
164 | 	int ones_cnt = (int)ones_count(tmds_data);
165 | 	int zeros_cnt = 9-ones_cnt;
166 | 	uint8_t difference = (uint8_t)abs(ones_cnt-zeros_cnt);
167 | 	return difference;
168 | }
169 | 
170 | //disparity is a 5-bit signed integer converted to a 5-bit unsigned integer
171 | //takes a color channel value and interleaves it into a TMDS word with disparity
172 | //If the signed value is 0, then assume the output is how much disparity to add for that color value.
173 | //Which means that disparity multiplied by 3 is the result of encoding a pixel.
174 | //If it isn't 0, then the added disparity is the difference between the old and new values.
175 | //Current LUT has 2 words per entry: one for the 3 TMDS words it outputs for the same pixel, and one for the resulting disparity.
176 | void tmds_calc_disparity(struct tmds_pixel_t *tmds_pixel)
177 | {
178 | 	//for the LUT, there are 2^(8+5) -> 8192 entries; 32 for each color channel value
179 | 	//since disparity is a signed 5-bit integer from -16 to 15
180 | 	int this_disparity = tmds_pixel->disparity;
181 | 	uint8_t ones_cnt = ones_count(tmds_pixel->color_data);
182 | 	uint16_t tmds_word = 0;
183 | 	uint8_t difference = 0;
184 | 	// Is there an excess of ones or is bit 0 equal to 0? If yes, then XNOR
185 | 	if(ones_cnt>4 || !((tmds_pixel->color_data)&0x01))
186 | 	{
187 | 		tmds_word = tmds_xnor(tmds_pixel->color_data);
188 | 	}
189 | 	// Is there an excess of zeroes? If yes, then XOR
190 | 	if(ones_cnt<4)
191 | 	{
192 | 		tmds_word = tmds_xor(tmds_pixel->color_data);
193 | 	}
194 | 	difference = bit_diff(tmds_word);
195 | 	if(ones_cnt==4 || !(tmds_pixel->disparity))
196 | 	{
197 | 		if(tmds_word&0x100)
198 | 		{
199 | 			this_disparity = this_disparity+(int)difference;
200 | 		}
201 | 		else
202 | 		{
203 | 			tmds_word = (~tmds_word)&0x3ff;
204 | 			this_disparity = this_disparity-(int)difference;
205 | 		}
206 | 	}
207 | 	else
208 | 	{
209 | 		if(((tmds_pixel->disparity)>0 && ones_cnt>4) || ((tmds_pixel->disparity)<0 && ones_cnt<4))
210 | 		{
211 | 			if(tmds_word&0x100)
212 | 			{
213 | 				tmds_word = ((~tmds_word)&0x2ff)|(tmds_word&0x100);
214 | 				this_disparity = (this_disparity-(int)difference)+2;
215 | 			}
216 | 			else
217 | 			{
218 | 				tmds_word = ((~tmds_word)&0x2ff);
219 | 				this_disparity = this_disparity-(int)difference;
220 | 			}
221 | 		}
222 | 		else
223 | 		{
224 | 			if(tmds_word&0x100)
225 | 			{
226 | 				this_disparity = this_disparity+(int)difference;
227 | 			}
228 | 			else
229 | 			{
230 | 				this_disparity = (this_disparity+(int)difference)-2;
231 | 			}
232 | 		}
233 | 	}
234 | 	tmds_pixel->disparity = this_disparity;
235 | 	tmds_pixel->tmds_data = tmds_word;
236 | }
237 | 
238 | // The disparity should be pre-initialized, in a loop.
239 | // The LUT is 32*32*2 words long, or 8192 bytes.
240 | void tmds_pixel_repeat(uint32_t *lut_buf, struct tmds_pixel_t *tmds_pixel)
241 | {
242 | 	tmds_calc_disparity(tmds_pixel);
243 | 	lut_buf[((tmds_pixel->color_data)<<1)|(((tmds_pixel->disparity)+16)<<6)] = tmds_pixel->tmds_data;
244 | 	tmds_calc_disparity(tmds_pixel);
245 | 	lut_buf[((tmds_pixel->color_data)<<1)|(((tmds_pixel->disparity)+16)<<6)] |= (tmds_pixel->tmds_data)<<10;
246 | 	tmds_calc_disparity(tmds_pixel);
247 | 	lut_buf[((tmds_pixel->color_data)<<1)|(((tmds_pixel->disparity)+16)<<6)] |= (tmds_pixel->tmds_data)<<20;
248 | 	lut_buf[(((tmds_pixel->color_data)<<1)|(((tmds_pixel->disparity)+16)<<6))+1] = (tmds_pixel->disparity)+16;
249 | }
250 | 
251 | //Color correction algorithms to use for generating the LUT: https://near.sh/articles/video/color-emulation
252 | //Input colors are (I assume) 0 to 31
253 | //Output gamma is 2.2
254 | //LCD gamma is 4.0
255 | //1/gamma is ~0.455
256 | //The end multiplied value is ~59683.661
257 | //Simplified:
258 | //-Colors are divided by 31 and raised to the power of 4
259 | //-Color bleed is calculated(?)
260 | //-Value is divided by 255
261 | //-That value then gets something a bit less than its square root calculated
262 | //-Then finally gets multiplied by an end value
263 | 
264 | //This may require a whole lookup table for color correction
265 | //Which means 32768 * 32-bit words = 131072 bytes per LUT
266 | //Or 3 * 32 * 1024 bytes for all channels, 98304 bytes total
267 | //Which the RP2040 doesn't have if a single buffer is used.
268 | /*
269 | void gba_lcd_correct(uint8_t r_in, uint8_t g_in, uint8_t b_in, uint8_t *r_out, uint8_t *g_out, uint8_t *b_out)
270 | {
271 | 	double lcdGamma = 4.0, outGamma = 2.2;
272 | 	double lb = pow(((double)b_in / 31.0), lcdGamma);
273 | 	double lg = pow(((double)g_in / 31.0), lcdGamma);
274 | 	double lr = pow(((double)r_in / 31.0), lcdGamma);
275 | 	*r_out = (uint8_t)pow((((50*lg)+(255*lr))/255), (1/outGamma))*((0xffff*255)/280);
276 | 	*g_out = (uint8_t)pow((((30*lb)+(230*lg)+(10*lr))/255), (1/outGamma))*((0xffff*255)/280);
277 | 	*b_out = (uint8_t)pow((((220*lb)+(10*lg)+(50*lr))/255), (1/outGamma))*((0xffff*255)/280);
278 | }
279 | 
280 | void gbc_lcd_correct(uint8_t r_in, uint8_t g_in, uint8_t b_in, uint8_t *r_out, uint8_t *g_out, uint8_t *b_out)
281 | {
282 | 	int R = ((int)r_in*26 + (int)g_in*4 + (int)b_in*2);
283 | 	int G = ((int)g_in*24 + (int)b_in*8);
284 | 	int B = ((int)r_in*6 + (int)g_in*4 + (int)b_in*22);
285 | 	R = min(960, R)>>2;
286 | 	G = min(960, G)>>2;
287 | 	B = min(960, B)>>2;
288 | 	*r_out = (uint8_t)R;
289 | 	*g_out = (uint8_t)G;
290 | 	*b_out = (uint8_t)B;
291 | }
292 | */
293 | 
294 | // This converts the GBC/GBA 5bpc colors into 8bpc with no color correction.
295 | uint8_t depth_convert(uint8_t c_in)
296 | {
297 | 	uint8_t c_out = (c_in<<3)|((c_in&0x1c)>>2);
298 | 	return c_out;
299 | }
300 | 
301 | int main()
302 | {
303 |     /*
304 |     uint8_t color_lut_gba[3][32][1024]; //this should probably be malloc
305 |     uint8_t color_lut_gbc[3][32][1024];
306 |     uint16_t lut_addr_r, lut_addr_g, lut_addr_b;
307 |     uint8_t r_a, g_a, b_a, r_c, g_c, b_c;
308 |     for(uint8_t r=0; r<32; r++)
309 |     {
310 |     	for(uint8_t g=0; g<32; g++)
311 |     	{
312 |     		for(uint8_t b=0; b<32; b++)
313 |     		{
314 |     			gba_lcd_correct(r, g, b, &r_a, &g_a, &b_a);
315 |     			gbc_lcd_correct(r, g, b, &r_c, &g_c, &b_c);
316 |     			lut_addr_r = g_a<<5|b_a;
317 |     			lut_addr_g = b_a<<5|r_a;
318 |     			lut_addr b = r_a<<5|g_a;
319 |     			color_lut_gba[0][r][lut_addr_r] = r_a;
320 |     			color_lut_gba[1][g][lut_addr_g] = g_a;
321 |     			color_lut_gba[2][b][lut_addr_b] = b_a;
322 |     			lut_addr_r = g_c<<5|b_c;
323 |     			lut_addr_g = b_c<<5|r_c;
324 |     			lut_addr b = r_c<<5|g_c;
325 |     			color_lut_gbc[0][r][lut_addr_r] = r_c;
326 |     			color_lut_gbc[1][g][lut_addr_g] = g_c;
327 |     			color_lut_gbc[2][b][lut_addr_b] = b_c;
328 |     		}
329 |     	}
330 |     }
331 |     //Since the color correction could be applied, there needs to be 32*256 entries of 32-bit words
332 |     uint32_t tmds_lut[32][256];
333 |     for(uint16_t i=0; i<256; i++)
334 |     {
335 |     	for(int j=-16; j<16; j++)
336 |     	{
337 |     		tmds_lut[j+16][i] = tmds_calc_disparity(i, j);
338 |     	}
339 |     }
340 | 	*/
341 | 	//Try to find out which combinations of disparities and data lead to it zeroing out
342 | 	//with 2 of the same TMDS words and one different word.
343 |     uint32_t tmds_word = 0;
344 |     int disp_0 = 0, disp_1 = 0;
345 |     uint16_t c_value = 0;
346 |     for(uint16_t i=0; i<32; i++)
347 |     {
348 |     	c_value = (uint16_t)depth_convert((uint8_t)i);
349 |     	tmds_word = tmds_calc_disparity(c_value, 0);
350 |     	disp_0 = (((int)((tmds_word&0x1f000000)>>24))-16)*2;
351 |     	tmds_word = tmds_calc_disparity(c_value, disp_0);
352 |     	disp_1 = ((int)((tmds_word&0x1f000000)>>24))-16;
353 |     	printf("Disparity for value %2x: %2d initial, %2d final\n", c_value, disp_0, disp_1);
354 |     }
355 | 
356 |     return 0;
357 | }


--------------------------------------------------------------------------------
/docs/DOCUMENTATION.md:
--------------------------------------------------------------------------------
  1 | ## Documentation used and produced for this project
  2 | This serves as a place where I put \(most of\) the various info that I gathered and compiled in order to work on and complete this project\. It includes some PDFs that I referenced\.
  3 | 
  4 | First, I provide the information on the standards, and then about the device itself\!
  5 | 
  6 | ---
  7 | 
  8 | ### Color Correction
  9 | I got my info on color correction from [this site](https://near.sh/articles/video/color-emulation)\. It has information on how to translate RGB555 colors to RGB888 colors directly, how to emulate the GBA LCD colors, and how to emulate the GBC LCD colors\. I have integrated this information into my `tmds_util_colorcor.c` program, which is purely for archival purposes at the moment\. I can't guarantee whether or not it will actually work, but if it can, the final product will have optional color correction when uploading the firmware to the board\. \(At the moment, I don't think that there will be enough time to perform color correction\.\)
 10 | 
 11 | ---
 12 | 
 13 | ### Helpful info I missed initially
 14 | One of the things I missed during my research was BCH encoding, which is done to data transmitted during the data island periods before being encoded to TERC4 \(all of which I'll talk about later\.\) Someone already beat me to the punch with sending audio over HDMI with the Raspberry Pi Pico, but that just means I can benefit from the work they did\! [This repository](https://github.com/shuichitakano/pico_lib) has most of the info I missed in the `dvi` folder\. Specifically, `data_packet.cpp`\.
 15 | 
 16 | ---
 17 | 
 18 | ### The Difference Between VGA and DVI
 19 | DVI is essentially a digital version of VGA with support for higher resolutions because of how it is encoded to support higher pixel clocks, as well as some changes to how syncing works in order to work with that transmission method\. The difference specifically being that 8\-bit color values are encoded to 10 bits, and a disparity variable is used to keep track of the difference between the number of zeroes and ones transmitted so that the DC offset of the signal can be kept to a minimum\. In addition to that, there are control signals/values that are transmitted at the end of the visible data along with a data enable signal so the sink knows it's time to listen for the sync signals\. The sync signals also have tighter timing requirements; sync signal transitions need to occur on the same pixel clock\. More details can be found within `tmds_util.c`\.
 20 | 
 21 | Below is a small table of how the control signals are encoded\. The control signals are used on all 3 TMDS \(short for **T**ransition **M**inimized **D**ifferential **S**ignaling\) data channels; specifically on channel 0, they are used for horizontal and vertical sync \(where bit 0 is hsync and bit 1 is vsync,\) and on all 3 channels they are used to indicate whether a data period is either video data or a data island \(which I will cover later\.\) These values are taken straight from the DVI 1\.0 document, which formats the output data as big\-endian; I have converted it to little\-endian for easier understanding\.
 22 | 
 23 | 
 24 | | Control state | Output data |
 25 | | ------------- | ------------ |
 26 | | 0b00 | 0b1101010100 |
 27 | | 0b01 | 0b0010101011 |
 28 | | 0b10 | 0b0101010100 |
 29 | | 0b11 | 0b1010101011 |
 30 | 
 31 | ---
 32 | 
 33 | ### The Difference Between DVI and HDMI
 34 | The difference between DVI and HDMI is that HDMI adds the ability to send encoded data over the 3 data channels during the blanking periods, or data island periods\. This includes InfoFrames, which allow the signal source to transmit information about the signal to the sink, such as resolution, framerate, color depth, and audio information like number of channels and sample rate\. Each InfoFrame has a 3\-byte header, and a 31\-byte packet including a checksum byte \(which is calculated by adding all the bytes in the packet together and subtracting that from 256\.\) This means 30 bytes are valid data, so as an example, in order to transmit audio consistently, 6 samples \(24 bytes\) are transmitted during one InfoFrame\. The header is transmitted one bit at a time over channel 0, and the packet data is transmitted over channels 1 and 2\.
 35 | 
 36 | The header consists of a packet type byte, a version byte \(for the version of HDMI standard being used,\) and a length byte\. The length byte indicates how many bytes of the packet \(starting from packet byte 1, where byte 0 is the checksum\) are valid data\.
 37 | 
 38 | Hold it\! Before I move on to how data is encoded, I need to specify that before the data island period starts, a few things need to happen\. There are actually control periods between the active video data and the data island periods which tell the sink what data period is coming next, which are padded by some extra "control data\." This just happens to be horizontal and vertical sync \(bits 0 and 1 respectively\) on channel 0, with the useful data on channels 1 and 2\. First, the source transmits the extra data \(being normal DVI sync data\) for at least 4 pixel clocks\. Second, the source transmits the control word across channels 1 and 2 for 8 pixel clocks\. If the next data period is the data island, channels 1 and 2 transmit `0b01`, and if the next period is active video data, channel 1 transmits `0b01` and channel 2 transmits `0b00`\. \(2\-bit values refer to the control state lookup table above\.\) \(If this is not the case when I test it, then it refers to the guardband states\.\)
 39 | 
 40 | These preambles are supplemented by guard bands, which appear before the beginning of active video data, and at the beginning and end of the data island period, and are transmitted for 2 pixel clocks each\.
 41 | 
 42 | | Channel | Video guardband | Data island guardband |
 43 | | ------- | --------------- | --------------------- |
 44 | | 0 | 0b1011001100 | n/a |
 45 | | 1 | 0b0100110011 | 0b0100110011 |
 46 | | 2 | 0b1011001100 | 0b0100110011 |
 47 | 
 48 | ---
 49 | 
 50 | ### Auxiliary Data Encoding
 51 | HDMI uses an encoding method called TERC4 \(short for **T**MDS **E**rror **R**eduction **C**oding **4**\-bit\) to transmit data during the blanking periods, which are also known as data island periods\. TERC4 involves encoding 4 bits into a 10\-bit string to transmit via TMDS\. I don't know the exact algorithm used to do the encoding, but I do have a lookup table of those values\.
 52 | 
 53 | | 4\-bit value | 10\-bit encoded value |
 54 | | ----------- | ------------------- |
 55 | | 0b0000 | 0b1010011100 |
 56 | | 0b0001 | 0b1001100011 |
 57 | | 0b0010 | 0b1011100100 |
 58 | | 0b0011 | 0b1011100010 |
 59 | | 0b0100 | 0b0101110001 |
 60 | | 0b0101 | 0b0100011110 |
 61 | | 0b0110 | 0b0110001110 |
 62 | | 0b0111 | 0b0100111100 |
 63 | | 0b1000 | 0b1011001100 |
 64 | | 0b1001 | 0b0100111001 |
 65 | | 0b1010 | 0b0110011100 |
 66 | | 0b1011 | 0b1011000110 |
 67 | | 0b1100 | 0b1010001110 |
 68 | | 0b1101 | 0b1001110001 |
 69 | | 0b1110 | 0b0101100011 |
 70 | | 0b1111 | 0b1011000011 |
 71 | 
 72 | However, before the data can be TERC4 encoded, it has to be encoded with BCH error correction\. This splits each data island packet into 4 subpackets, each containing 56 bits of data and 8 bits of BCH ECC parity bits\. These are then put into BCH blocks; block 0 is mapped to bit 0 of TMDS channels 1 and 2, so that 64 bits of BCH block 0 are transferred over 32 pixels \(little\-endian, where bit 0 goes to channel 1 and bit 1 goes to channel 2 and so on\.\) BCH blocks 1\-3 are mapped in a similar manner, to bits 1\-3 of channels 1 and 2\.
 73 | The parity bits come at the end of each packet, after all the subpackets\- subpacket 0 bytes 0 through 6 are packet bytes 0 to 6, subpacket 1 is mapped to packet bytes 7 to 13, and so on, until packet byte 27\.
 74 | TL;DR data packets have 4 BCH blocks, comprising 7 data bytes and 8 parity bits each; the data bytes are sent first in the data packet, and the BCH parity bits are sent at the end\.
 75 | 
 76 | The HDMI 1\.3a specification document doesn't provide easily decipherable info on how to encode the BCH bytes, so I'll just refer to the repository that I mentioned at the beginning of this document\.
 77 | 
 78 | ---
 79 | 
 80 | ### Audio sample clock capture/regeneration theory
 81 | Since the audio is transmitted at the TMDS clock frequency, there is no immediate way to reconstruct the sample rate\. The HDMI source does have to send information every frame or two regarding the audio format \(which includes the sample rate,\) but that doesn't mean the sink can always regenerate the audio clock from that\. Instead, an InfoFrame may be sent that tells the sink the relationship between the TMDS clock and the audio sample clock in order to regenerate it\.
 82 | 
 83 | In this case, since the TMDS clock is 294MHz and the sample rate is 48KHz, the TMDS clock can simply be divided by 6125 to get the sample clock\. However, the audio reference clock for HDMI is 128 times the sample rate, so the TMDS clock is divided by 375 instead\. Because the relationship between the audio reference clock and the TMDS clock is `128*sample_rate = tmds_clock*(N/CTS)`, the value of N/CTS can simply be 1/375\.
 84 | 
 85 | However, because the sample clock is most likely already reconstructed from the information transmitted for the audio in general, this information isn't necessary to reconstruct the clock\.
 86 | 
 87 | ---
 88 | 
 89 | ### Signal Specs
 90 | The output resolution is 720x480p at a pixel clock of 29\.4MHz \(and a TMDS clock of 294MHz respectively\.\) The total area used by the frame is 912x539, and the vertical refresh rate \(framerate\) is \~59\.8086Hz\. It's not the Gameboy's vertical refresh of \~59\.73Hz, but it's only \~0\.1358% faster\. Additionally, the resolution and TMDS clock \(which is the RP2040 system clock\) allows an output clock of 4\.2MHz or 8\.4MHz to be perfectly synchronous with a Gameboy, Gameboy Advance or Gameboy Color\. Below is a table comparing the full specs of this signal to the standard 720x480p 60Hz\.
 91 | 
 92 | General info:
 93 | | Standard or no | Format code | Hactive | Vactive | Htotal | Hblank | Vtotal | Vblank | H freq \(KHz\) | V freq \(Hz\) | Pixel freq \(MHz\) |
 94 | | -------------- | ----------- | ------- | ------- | ------ | ------ | ------ | ------ | -------------- | ------------- | ------------------ |
 95 | | Standard | 2, 3 | 720 | 480 | 858 | 138 | 525 | 45 | 31\.4690 | 59\.9400 | 27\.000 |
 96 | | Custom | 2, 3 | 720 | 480 | 912 | 192 | 539 | 59 | 32\.2368 | 58\.8086 | 29\.400 |
 97 | 
 98 | The custom video resolution extends horizontal blanking by 54 pixels, and vertical blanking by 14 pixels\.
 99 | 
100 | Specific sync info:
101 | | Standard or no | Hfront | Hsync | Hback | Hpol | Vfront | Vsync | Vback | Vpol |
102 | | -------------- | ------ | ----- | ----- | ---- | ------ | ----- | ----- | ---- |
103 | | Standard | 16 | 62 | 60 | N | 9 | 6 | 30 | N |
104 | | Custom | 32 | 64 | 96 | N | 13 | 8 | 38 | N |
105 | 
106 | If you're confused, here's what the terms mean:
107 | - Hfront/Vfront: Horizontal/vertical front porch \(pixels/lines after the active video data before the sync pulse\)
108 | - Hback/Vback: Horizontal/vertical back porch \(pixels/lines after the sync pulse before the active video data starts again\)
109 | - Hpol/Vpol: Horizontal/vertical sync pulse polarity
110 | 
111 | From the custom specifications, here are the \(rough\) data transmission timings for video outside of vertical blanking on channels 1 and 2:
112 | | Data period | Duration \(in pixel clocks\) |
113 | | ----------- | -------------------------- |
114 | | Data island padding data | 22 |
115 | | Data island preamble | 8 |
116 | | Data island guardband 1 | 2 |
117 | | Hsync pulse and data island | 64 |
118 | | Data island guardband 2 | 2 |
119 | | Video padding data | 84 |
120 | | Video preamble | 8 |
121 | | Video guardband | 2 |
122 | | Active video data | 720 |
123 | 
124 | And here's the version without any data island periods:
125 | | Data period | Duration \(in pixel clocks\) |
126 | | ----------- | -------------------------- |
127 | | Sync data | 182 |
128 | | Video preamble | 8 |
129 | | Video guardband | 2 |
130 | | Active video data | 720 |
131 | 
132 | ---
133 | 
134 | ### Summary of Concepts
135 | - DVI is based off of VGA, but is digital, supports higher resolutions, has more precise sync timing requirements, and transmits data 10 times faster than the pixel clock \(because it is a serial data stream\)
136 | - HDMI is based off of DVI, but requires a bit of extra data in order to work
137 | - Video data is encoded to 10 bit words using the TMDS algorithm which encodes data based on the current signal's DC offset \(or disparity\)
138 | - HDMI adds preambles, guardbands and data islands to DVI in order to transmit other data besides video during the blanking intervals \(which is encoded with TERC4\)
139 | - The preambles and guardbands are fixed data words
140 | - Data is transmitted in packets, which have a 3\-byte header, a checksum, and 31 bytes of data, and 2 packets can be transmitted per hblank
141 | 
142 | ---
143 | 
144 | ### Hardware Components and Pin Mapping
145 | First, I will go over what hardware components I will be using in order to make this thing work\. Keep in mind, this is what I used just for the prototype:
146 | - Solderless breadboard\(s\)
147 | - Raspberry Pi Pico board
148 | - Adafruit DVI breakout board
149 | - TXS0108E 8\-bit level shifter breakout boards
150 | - Custom audio buffer board
151 | - 3\.5mm audio jack breakout board
152 | - A bunch of jumper/breadboard wires
153 | 
154 | And now the pin mapping\. For the sake of simplicity, I won't include the pin numbers on the Pico board\.
155 | | GPIO \(range\) | Function |
156 | | ------------ | -------- |
157 | | 0 \- 1 | Level shifter output enables |
158 | | 2 \- 9 | Multiplexed LCD data input |
159 | | 10 \- 12 | Pixel clock, hsync and vsync inputs |
160 | | 13 | Optional system clock output |
161 | | 14 \- 21 | HDMI output |
162 | | 26 \- 27 | ADC audio inputs |
163 | 
164 | ---
165 | 
166 | ### Initial DVI Test Program
167 | Because I want to start out small, I want to create a test program that simply displays a solid color and outputs sync signals in DVI mode\. That is, without any preambles or guard bands for the data island periods\. To make it simple, here's how the program will work:
168 | - Have one constant value/color to display that is sent in pairs with opposite parity so the equivalent DC offset is zero
169 | - At the start of the frame, DMA that value for the horizontal resolution
170 | - Chain into sync DMA \(the whole sync period is stored as raw data\)
171 | - Chain back into active display
172 | - Repeat for active vertical lines
173 | - On the last sync period after the active display period, start DMAing from vsync buffer
174 | - On the last hsync pulse before vsync ends, start DMAing from standard hsync buffer
175 | 
176 | And here's how each DMA will work:
177 | - Active display: increment source \(frame or line buffer,\) constant destination \(PIO FIFO\), chain into sync DMA when done
178 | - Sync: increment source \(output sync data,\) constant destination \(PIO FIFO\), chain into active display DMA when done
179 | - Extra 1: on the last active video line, chain into a reconfigure DMA which switches the video position to just hsync and vsync values
180 | - Extra 2: every 2 lines, chain into a reconfigure DMA which resets the line buffer position \(the output is from a double line buffer\)
181 | 
182 | For the record, the DMA will transfer data to the PIOs when the FIFOs are either empty or not full\.
183 | 
184 | In order to reduce DMA channel usage, there will be 2 DMAs for configuration: one that reads from a control data buffer and writes to the main channels, and one that the first DMA chains into that changes the read and write addresses of the first DMA\.
185 | 
186 | Where literal channel 0 chains into "DMA 1" and that chains into "DMA 2" which normally chains into "DMA 1" again\. Each channel can be configured to wrap either its read or write address space\. DMA 1 should wrap in its write address space, and DMA 2 should wrap in its read address space \(since DMA 1 always reconfigures DMA 2, and DMA 2 configures the TMDS transmit channels as well as switch DMA 1's read position\.\)
187 | 
188 | Channel 0: finishes displaying active data, chains into channel 3 and DMA 1 \(assuming DMA 1 is already reading from channels 0\-2 configuration buffer\)
189 | - DMA 1: configures DMA 2 to chain back into DMA 1
190 | - DMA 2: configures channel 0
191 | - DMA 1: configures DMA 2 to write to channel 1
192 | - DMA 2: configures channel 1
193 | - DMA 1: configures DMA 2 to write to channel 2
194 | - DMA 2: configures channel 2
195 | - DMA 1: configures DMA 2 to read from DMA 1 configuration buffer and to not chain back into DMA 1
196 | - DMA 2: configures DMA 1 to read from channels 3\-5 configuration buffer
197 | 
198 | Channel 3: finishes sync, chains into channel 0 and DMA 1
199 | - DMA 1: configures DMA 2 to chain back into DMA 1
200 | - DMA 2: configures channel 3
201 | - DMA 1: configures DMA 2 to write to channel 4
202 | - DMA 2: configures channel 4
203 | - DMA 1: configures DMA 2 to write to channel 5
204 | - DMA 2: configures channel 5
205 | - DMA 1: configures DMA 2 to read from DMA 1 configuration buffer and to not chain back into DMA 1
206 | - DMA 2: configures DMA 1 to read from channels 0\-2 configuration buffer
207 | 
208 | Handling some of the DMA reconfigure action might need to be done by the TMDS encode core if there's enough time to do so\- PicoDVI outputs at a standard 800x525 resolution and only doubles its pixels instead of tripling them, so it shouldn't be unreasonable, considering that the CPU has at most 608 clock cycles to pack 16 tripled TMDS color channel values into 15 words in memory \(which is 38 clock cycles per tripled word\.\)
209 | 
210 | However, I could handle it with the other CPU core and interrupts, since there's already more than enough time to encode a frame's worth of audio using C code and 2 DMA channels will be handling input capture without any aid from the CPU\. In that case, a DMA interrupt will occur every line if enabled by the CPU, and 4 PWM units will be configured to fire off an interrupt once every frame; one synchronized with the beginning of the last visible line \(or the start of the sync period, I don't know\), one synchronized with the beginning of the line when the vertical sync pulse is active, one synchronized with the beginning of the line when the vertical sync pulse becomes inactive, and one synchronized with the beginning of the line or sync period of the very last line of the frame\. One of those interrupts could also tell the CPU to switch DMA channels 3\-5 to a buffer containing an AVI InfoFrame \(to refresh the video signal information for the HDMI sink\) or an audio InfoFrame \(to refresh the audio information for the sink\.\) The one PWM unit that optionally triggers an interrupt every line is enabled by the CPU when it finishes encoding a single audio block, and disabled when that audio block has been fully transmitted\. When active, channel 3\-5 behavior changes so that instead of transmitting a single line of normal sync data \(including the vertical blanking period\) over and over, it switches to a sync and aux data buffer constructed by the CPU during audio encoding\.
211 | 
212 | When the CPU finishes encoding a full audio block, it will enable interrupts from DMA channel 3\. When it receives that interrupt, it immediately halts DMA 1 and DMA 2 and switches them over to a set of separate command block buffers, which depending on the current line can be either a vsync inactive buffer or a vsync active buffer\. After the entire audio block has been sent, the CPU leaves the interrupt enabled if it has finished encoding it, or disables it and switches DMA 1 and DMA 2 back to the normal sync buffer if it hasn't finished encoding it\. Because there is a difference between active video and vblank periods in terms of sync output \(which influences how TMDS channel 0 is encoded,\) the CPU will explicitly encode a fixed number of audio blocks to send during vblank\. If 12 samples are sent per line, and 16 lines make up an audio block, then it will encode a total of 2 audio blocks to send during the vsync back porch since it lasts 38 lines, and 2 audio blocks can be sent in 32 lines\. Since 8 or 9 audio blocks are transmitted per frame depending on how many samples are left over, that means 6 or 7 audio blocks will be sent during active video, over 60 or 72 lines\.
213 | 
214 | For vsync behavior, the first PWM interrupt should be synced with the end of the last active display period\. The CPU will then configure DMA channels 0\-2 to read from a constant address instead of the normal display line buffer\. The second, third and fourth PWM interrupts will also be synced at the end of what would be an active display line, and the CPU will just switch DMA channels 0\-2 between different configurations as to what to display during what would be active video\.
215 | 
216 | Of course, since I need to use DMA, I need to learn how it works from a programming point of view, and not just a technical point of view\.
217 | 
218 | ---
219 | 
220 | ## Implementing Everything in Programming
221 | In order to implement really *any* of the concepts listed here in programming, the Raspberry Pi Pico SDK is very useful and helpful\. I've compiled a list of the hardware components I will be using, and the code/functions used to interface with them\.
222 | 
223 | ---
224 | 
225 | ### Part 1: DMA
226 | To start, let's have a look at the DMA functions\. Here are the ones that stuck out to me as the most important \(not really in any particular order\):
227 | - `dma_channel_claim (uint channel)`
228 | - `dma_channel_set_read_addr (uint channel, const volatile void *read_addr, bool trigger)` to set a channel's read address\.
229 | - `dma_channel_set_write_addr (uint channel, volatile void *write_addr, bool trigger)` to set a channel's write address\.
230 | - `dma_channel_set_trans_count (uint channel, uint32_t trans_count, bool trigger)` to set a channel's number of transfers to perform before stopping\.
231 | - `dma_channel_set_config (uint channel, const dma_channel_config *config, bool trigger)` to set a channel's configuration from a `dma_channel_config`\.
232 | - `dma_channel_start (uint channel)` to start a single channel\.
233 | - `dma_start_channel_mask (uint32_t chan_mask)` to start multiple channels at once\.
234 | - `dma_channel_abort (uint channel)` to stop a channel\.
235 | 
236 | Here's the channel config functions, which interact with the `dma_channel_config` data type:
237 | - `channel_config_set_read_increment (dma_channel_config *c, bool incr)` to set whether or not the read address is incremented\.
238 | - `channel_config_set_write_increment (dma_channel_config *c, bool incr)` to set whether or not the write address is incremented\.
239 | - `channel_config_set_dreq (dma_channel_config *c, uint dreq)` to set a DREQ source for a particular DMA channel\. Refer to the table below for sources\.
240 | - `channel_config_set_chain_to (dma_channel_config *c, uint chain_to)` to set a channel to chain to\. Doesn't appear to support chaining to multiple channels at once, but further research is needed\.
241 | - `channel_config_set_transfer_data_size (dma_channel_config *c, enum dma_channel_transfer_size size)` to set the transfer size\.
242 | - `channel_config_set_ring (dma_channel_config *c, bool write, uint size_bits)` to set the bottom number of bits that will change for the DMA address if enabled\.
243 | - `channel_config_set_irq_quiet (dma_channel_config *c, bool irq_quiet)` to set whether or not a completed transfer will trigger an IRQ\.
244 | - `channel_config_set_enable (dma_channel_config *c, bool enable)` to enable the DMA channel\.
245 | 
246 | Here is a table of all the DREQ sources:
247 | | DREQ | DREQ Channel | DREQ | DREQ Channel | DREQ | DREQ Channel | DREQ | DREQ Channel |
248 | | ---- | ------------ | ---- | ------------ | ---- | ------------ | ---- | ------------ |
249 | | 0 | DREQ_PIO0_TX0 | 10 | DREQ_PIO1_TX2 | 20 | DREQ_UART0_TX | 30 | DREQ_PWM_WRAP6 |
250 | | 1 | DREQ_PIO0_TX1 | 11 | DREQ_PIO1_TX3 | 21 | DREQ_UART0_RX | 31 | DREQ_PWM_WRAP7 |
251 | | 2 | DREQ_PIO0_TX2 | 12 | DREQ_PIO1_RX0 | 22 | DREQ_UART1_TX | 32 | DREQ_I2C0_TX |
252 | | 3 | DREQ_PIO0_TX3 | 13 | DREQ_PIO1_RX1 | 23 | DREQ_UART1_RX | 33 | DREQ_I2C0_RX |
253 | | 4 | DREQ_PIO0_RX0 | 14 | DREQ_PIO1_RX2 | 24 | DREQ_PWM_WRAP0 | 34 | DREQ_I2C1_TX |
254 | | 5 | DREQ_PIO0_RX1 | 15 | DREQ_PIO1_RX3 | 25 | DREQ_PWM_WRAP1 | 35 | DREQ_I2C1_RX |
255 | | 6 | DREQ_PIO0_RX2 | 16 | DREQ_SPI0_TX | 26 | DREQ_PWM_WRAP2 | 36 | DREQ_ADC |
256 | | 7 | DREQ_PIO0_RX3 | 17 | DREQ_SPI0_RX | 27 | DREQ_PWM_WRAP3 | 37 | DREQ_XIP_STREAM |
257 | | 8 | DREQ_PIO1_TX0 | 18 | DREQ_SPI1_TX | 28 | DREQ_PWM_WRAP4 | 38 | DREQ_XIP_SSITX |
258 | | 9 | DREQ_PIO1_TX1 | 19 | DREQ_SPI1_RX | 29 | DREQ_PWM_WRAP5 | 39 | DREQ_XIP_SSIRX |
259 | 
260 | The HDMI output will utilize TX FIFO DREQs in order to pace its transfers; DMA channels 0\-2 and 3\-5 get DREQ_PIO1_TX0\-2, channel 8 gets DREQ_PIO0_RX0, and channel 11 gets DREQ_ADC\.
261 | 
262 | To recap, here's the functions of every DMA channel:
263 | - Channels 0\-2: main TMDS active video transfer
264 | - Channels 3\-5: sync and aux data transfer
265 | - Channel 6\-7: channel 0\-5 reconfiguring
266 | - Channel 8: input LCD data transfer to framebuffer \(double buffer of 240x160 words total\)
267 | - Channels 9: channel 8 reconfiguring
268 | - Channel 10: ADC sample transferring \(double buffer of 192 samples each, 96 samples per channel\)
269 | - Channel 11: channel 10 reconfiguring \(buffers may be contiguous, but the CPU should be interrupted every time one buffer is filled, which equals one audio block's worth of samples\(?\)\)
270 | 
271 | ---
272 | 
273 | ### Part 2: System Clock and Core Voltage
274 | Since the Pico needs to run at 294MHz in order to output an HDMI signal, let's look at how to configure the system clock\. Here are the functions that I'm going to use:
275 | - `check_sys_clock_khz (uint32_t freq_khz, uint *vco_freq_out, uint *post_div1_out, uint *post_div2_out)` checks to see if a system clock frequency is valid, along with pointers to variables which will store the VCO frequency and dividers if it is valid\. These values can then be used to configure the system clock\.
276 | - `set_sys_clock_pll (uint32_t vco_freq, uint post_div1, uint post_div2)` to set the system's PLL directly\. I'll be using this in conjunction with `check_sys_clock_khz`\.
277 | 
278 | In addition to doing that, the core voltage needs to be changed in order to successfully overclock the Pico to 294MHz\. [This video](https://www.youtube.com/watch?v=G2BuoFNLoDM&t=194s) gets into that around 3 minutes and 10 seconds in, and it seems that the minimum voltage for the speed I want to run it at is 1\.15 volts\. This requires the addition of `#include "hardware/vreg.h"`, and `vreg_set_voltage(VREG_VOLTAGE_1_15)` to set the voltage to the desired amount\.
279 | 
280 | ---
281 | 
282 | ### Part 3: GPIO and PIO Configuration
283 | The basic concept of GPIO is to either set the pin modes to input or output, and if it's an input optionally configure internal pull\-up or pull\-down resistors and then read from a register to get the pin states, and if it's an output, write to a register to set the pin outputs\. However, I there's a separate step in the process required for PIO connection to GPIO on the CPU side\- just set the pin function of the GPIOs that will be used by the PIO either to `GPIO_FUNC_PIO0` or `GPIO_FUNC_PIO1`\. Here are the relevant functions related to GPIO:
284 | - `gpio_set_function (uint gpio, enum gpio_function fn)` to set the function of a specific GPIO\.
285 | - `gpio_set_pulls (uint gpio, bool up, bool down)` to set the pulls of a specific GPIO\.
286 | - `gpio_set_input_enabled (uint gpio, bool enabled)` to enable a single GPIO as input\.
287 | - `gpio_set_slew_rate (uint gpio, enum gpio_slew_rate slew)` to set the slew rate of a GPIO\. This effectively makes the GPIO either low speed or high speed\. The two settings are `GPIO_SLEW_RATE_SLOW` and `GPIO_SLEW_RATE_FAST`\.
288 | - `gpio_set_drive_strength (uint gpio, enum gpio_drive_strength drive)` to set how much current at its maximum the GPIO will provide\. The strengths are 2mA, 4mA, 8mA and 12mA, specified by `GPIO_DRIVE_STRENGTH_nMA` where n is the strength\.
289 | - `gpio_set_dir_out_masked (uint32_t mask)` to set whichever GPIOs corresponding to set bits as outputs\.
290 | - `gpio_set_dir_in_masked (uint32_t mask)` to set whichever GPIOs corresponding to set bits as inputs\.
291 | 
292 | PIOs are very versatile, because 'in', 'out', 'set' and 'side\-set' pins can mapped to different areas with different numbers of addressable pins associated with them\. The PIO state machine address/wrap space can also be configured\- in PicoDVI, the address space is set to only 1 bit \(2 instructions\) so that single\-ended TMDS data can be translated into a differential output using the program counter as a LUT address for a side\-set that creates the output\!
293 | 
294 | ---
295 | 
296 | ### Part 4: Interpolator
297 | Apparently the RP2040 has this useful piece of hardware called an interpolator, which comprises of 2 lanes which each have an accumulator \(which can add numbers,\) a right\-shift unit, a mask unit, and sign extension \(which fills the top bits with 0 or 1 based on the current MSb\.\) Since creating the lookup table address for a pixel requires a shift and mask, the interpolator is perfect for it\. The only action required by the processor is to left shift the output pixel once and OR it with the current disparity value\. In addition, the interpolator could be used
298 | 
299 | ---
300 | 
301 | ### Part 5: Interrupts and PWM
302 | TODO
303 | 
304 | ---
305 | 
306 | ### Part : ADC
307 | TODO
308 | 
309 | ---
310 | 
311 | ### Compiling the code
312 | Because timing is critical, running all the code from RAM is extremely necessary\. In order to do this, when compiling the code, just use `cmake -DPICO_COPY_TO_RAM=1` to let the compiler know that the program needs to be copied to RAM\. TODO
313 | 
314 | ---
315 | 
316 | ### How audio is encoded
317 | HDMI can transmit audio formatted in the AES3 standard, which basically comprises raw PCM and some data describing the audio stream so that the sink can decode it and play it back properly\. My current focus is making sure sync works, so this section is TODO
318 | 
319 | ---
320 | 
321 | ### HDMI audio test program
322 | Note: Because the InfoFrame packets are wacky, a null byte \(or header checksum?\) has to be sent during the header, and a null byte has to be sent at the end of a packet to pad it to 32 bytes\. In the case of audio, since only 6 samples or 24 bytes are transmitted in a packet in order to consistently send an audio block in 16 lines, it has to be padded with 7 null bytes, and the length in the header has to be set to 24\. I want to create the first DVI test program first before I think more about this, so it is currently TODO
323 | 
324 | ---
325 | 
326 | ### Gameboy model select
327 | The first prototype/working device won't have any model detection and will have separate firmwares for different models, but the final version might have model detection and switching\. In order to make the device more versatile, it needs to be able to get video output from every Gameboy model while using the same firmware\. There is one more GPIO pin, GP28, that can be used to determine which model of Gameboy is connected \(or the intended model\) when the device powers on\. If the pin is low, then it is either a GBC or a GBA \(the framebuffer will be the same size for all models and framebuffer position will be determined in software,\) but sampling will be different,\) and if the pin is high, a DMG \(monochrome Gameboy\) is connected\. Without using 2 input buffers, the 6 other inputs can be used to monitor a set of buttons that can switch between user\-configured color palettes\.
328 | 
329 | If the Gameboy in GBC/GBA mode is detected incorrectly, the pin can be brought low again in order to run the detection again\.
330 | 


--------------------------------------------------------------------------------
/scripts/tmds_util.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	tmds_util.c
  3 | 
  4 | 	This program generates the TMDS output data/lookup tables for the Raspberry Pi Pico/RP2040.
  5 | 	And various other utilities.
  6 | 
  7 | 	TO DO:
  8 | 	-Add TMDS audio LUT generation (if necessary)
  9 | 
 10 | 	The HDMI InfoFrame buffers will still have the standard sync data tacked onto them,
 11 | 	but will just transmit the relevant information during the hsync pulse so channel 0
 12 | 	is ORed with whatever current bit of the header is shifted left 2 bits, and channels 1
 13 | 	and 2 will carry the little-endian TERC4 encoded data.
 14 | 	So before the unpacked buffer is freed after it is copied to the packed buffer, inject
 15 | 	the data into it and pack it into a different buffer. (Though encoding the sync and header
 16 | 	bits in TERC4 will have to be done manually.)
 17 | */
 18 | 
 19 | #include <stdio.h>
 20 | #include <stdlib.h>
 21 | #include <stdbool.h>
 22 | #include <string.h>
 23 | #include <stdint.h>
 24 | #include <math.h>
 25 | #include <unistd.h>
 26 | #include "tmds_util.h"
 27 | 
 28 | const uint16_t sync_ctl_states[] = 
 29 | {
 30 | 	0b0000001101010100,
 31 | 	0b0000000010101011,
 32 | 	0b0000000101010100,
 33 | 	0b0000001010101011
 34 | };
 35 | 
 36 | const uint16_t guardband_states[] = 
 37 | {
 38 | 	0b0000001011001100,
 39 | 	0b0000000100110011
 40 | };
 41 | 
 42 | const uint16_t terc4_table[] = 
 43 | {
 44 | 	0b0000001010011100,
 45 | 	0b0000001001100011,
 46 | 	0b0000001011100100,
 47 | 	0b0000001011100010,
 48 | 	0b0000000101110001,
 49 | 	0b0000000100011110,
 50 | 	0b0000000110001110,
 51 | 	0b0000000100111100,
 52 | 	0b0000001011001100,
 53 | 	0b0000000100111001,
 54 | 	0b0000000110011100,
 55 | 	0b0000001011000110,
 56 | 	0b0000001010001110,
 57 | 	0b0000001001110001,
 58 | 	0b0000000101100011,
 59 | 	0b0000001011000011
 60 | };
 61 | 
 62 | // OR these with the InfoFrame header bits.
 63 | // 0 = during vsync, 1 = during active video (in the hblank interval, during the hsync pulse)
 64 | const uint8_t sync_masks[] = 
 65 | {
 66 | 	0b00001000,
 67 | 	0b00001010
 68 | };
 69 | 
 70 | // Creates the TMDS lookup table, where each entry has 3 separate pixels and an output disparity value (stored in 2 separate words.)
 71 | int main()
 72 | {
 73 |     uint32_t *tmds_lut = (uint32_t *)malloc(0x400*sizeof(uint32_t));
 74 |     struct tmds_pixel_t *tmds_pixel = (struct tmds_pixel_t *)malloc(sizeof(struct tmds_pixel_t));
 75 |     uint8_t color = 0, color_8b = 0;
 76 |     int dispy = 0;
 77 |     for(color=0; color<32; color++)
 78 |     {
 79 |     	for(dispy=-8; dispy<8; dispy++)
 80 |     	{
 81 |     		color_8b = depth_convert(color);
 82 |     		tmds_pixel->color_data_5b = color;
 83 |     		tmds_pixel->color_data = color_8b;
 84 |     		tmds_pixel->tmds_data = 0;
 85 |     		tmds_pixel->disparity = dispy;
 86 |     		tmds_pixel_repeat(tmds_lut, tmds_pixel);
 87 |     	}
 88 |     }
 89 | 
 90 |     FILE *pico_tmds_lut = fopen("tmds_lut.bin", "wb");
 91 |     fwrite(tmds_lut, 4, 1024, pico_tmds_lut);
 92 |     fclose(pico_tmds_lut);
 93 |     free(tmds_pixel);
 94 |     free(tmds_lut);
 95 |     // These functions create the sync buffers with the null packets and with no packets.
 96 |     // They do everything automatically, including packing the data and writing it to files.
 97 |     create_sync_buffers();
 98 |     create_sync_buffers_nodat();
 99 |     // Now create the AVI (video) InfoFrame.
100 |     // Creates both hsync and during vsync variants.
101 | 
102 |     create_avi_infoframe(); // Also writes them to files and frees the structs.
103 |     // Create a solid line that can be used to get a solid color on the screen.
104 |     // Black, white, red, green, blue, magenta, cyan, or yellow can be made with different combinations.
105 |     // The create_solid_line() function also writes it to a file.
106 |     struct tmds_pixel_t *solid_pixel = (struct tmds_pixel_t *)malloc(sizeof(struct tmds_pixel_t));
107 |     solid_pixel->color_data_5b = 0x00;
108 |     char *pixel_name = (char *)malloc(32);
109 |     sprintf(pixel_name, "pixel_0x00.bin");
110 |     create_solid_line(pixel_name, solid_pixel);
111 |     solid_pixel->color_data_5b = 0x1f;
112 |     sprintf(pixel_name, "pixel_0xff.bin");
113 |     create_solid_line(pixel_name, solid_pixel);
114 |     free(pixel_name);
115 |     free(solid_pixel);
116 |     
117 |     return 0;
118 | }
119 | 
120 | // Frees the allocated buffers before the program exits to prevent bad stuff from happening.
121 | void free_sync_buffers(struct sync_buffer_t *sync_buffer)
122 | {
123 | 	free(sync_buffer->hblank_ch0);
124 | 	free(sync_buffer->hblank_ch1);
125 | 	free(sync_buffer->hblank_ch2);
126 | 
127 | 	free(sync_buffer->vblank_en_ch0);
128 | 	free(sync_buffer->vblank_en_ch1);
129 | 	free(sync_buffer->vblank_en_ch2);
130 | 
131 | 	free(sync_buffer->vblank_syn_ch0);
132 | 	free(sync_buffer->vblank_syn_ch1);
133 | 	free(sync_buffer->vblank_syn_ch2);
134 | 
135 | 	free(sync_buffer->vblank_ex_ch0);
136 | 	free(sync_buffer->vblank_ex_ch1);
137 | 	free(sync_buffer->vblank_ex_ch2);
138 | 
139 | 	free(sync_buffer);
140 | }
141 | 
142 | void free_sync_buffers_32(struct sync_buffer_32_t *sync_buffer)
143 | {
144 | 	free(sync_buffer->hblank_ch0);
145 | 	free(sync_buffer->hblank_ch1);
146 | 	free(sync_buffer->hblank_ch2);
147 | 
148 | 	free(sync_buffer->vblank_en_ch0);
149 | 	free(sync_buffer->vblank_en_ch1);
150 | 	free(sync_buffer->vblank_en_ch2);
151 | 
152 | 	free(sync_buffer->vblank_syn_ch0);
153 | 	free(sync_buffer->vblank_syn_ch1);
154 | 	free(sync_buffer->vblank_syn_ch2);
155 | 
156 | 	free(sync_buffer->vblank_ex_ch0);
157 | 	free(sync_buffer->vblank_ex_ch1);
158 | 	free(sync_buffer->vblank_ex_ch2);
159 | 
160 | 	free(sync_buffer);
161 | }
162 | 
163 | void allocate_sync_buffer(uint16_t **buffer)
164 | {
165 | 	*buffer = (uint16_t *)malloc((H_TOTAL-H_ACTIVE)*sizeof(uint16_t));
166 | 
167 | 	return;
168 | }
169 | 
170 | void allocate_sync_buffer_32(uint32_t **buffer)
171 | {
172 | 	*buffer = (uint32_t *)malloc((((H_TOTAL-H_ACTIVE)*10)/32)*sizeof(uint32_t)); // Whoops! Initially forgot to put the multiplication factor there.
173 | 
174 | 	return;
175 | }
176 | 
177 | // Video format (hsync before active video):
178 | // Line 494: enter vsync buffer
179 | // Lines 495-501: during vsync buffer
180 | // Line 502: exit vsync buffer
181 | // During vsync buffer lasts 1 line less than the number of lines per vsync pulse
182 | // because the previous line counts as a line.
183 | // Interrupt/timing scheme (starts at line 1, not line 0):
184 | // Line 481 hblank start interrupt: reconfigure active video transmit as sync transmit
185 | // Line 493 active start interrupt: prepare enter vsync buffer
186 | // Line 494 active start interrupt: prepare during vsync buffer
187 | // Line 501 active start interrupt: prepare exit vsync buffer
188 | // Line 1 hblank start interrupt: reconfigure sync transmit as active video transmit again
189 | 
190 | // Creates 2 static data buffers: one for hsync, one for vsync.
191 | // Vsync buffer does not include a video data period preamble or guard band
192 | // They have null data during the data island periods.
193 | // Since there will be only one output resolution, this uses global defines.
194 | void create_sync_buffers()
195 | {
196 | 	struct sync_buffer_t *sync_buffer = (struct sync_buffer_t *)malloc(sizeof(struct sync_buffer_t));
197 | 
198 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch0));
199 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch1));
200 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch2));
201 | 
202 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch0));
203 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch1));
204 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch2));
205 | 
206 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch0));
207 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch1));
208 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch2));
209 | 
210 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch0));
211 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch1));
212 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch2));
213 | 	// Format, starting in hblank:
214 | 	// Normal sync data for at least 4 pixel clocks
215 | 	// Preamble for 8 pixel clocks (TMDS channel 1, channel 2): (data island here)
216 | 	// Data island: 0b01, 0b01; Video period: 0b01, 0b00
217 | 	// Guard band for 2 pixel clocks (channel 0, 1, 2): (data island here)
218 | 	// Video: 0b1011001100, 0b0100110011, 0b1011001100; Data: n/a, 0b0100110011, 0b0100110011
219 | 	// Data island period: 64 clocks total, 32 per InfoFrame/packet
220 | 	// Guard band for 2 pixel clocks (data island exit)
221 | 	// Normal sync data for at least 4 pixel clocks
222 | 	// Preamble for 8 pixel clocks (video period here)
223 | 	// Guard band for 2 pixel clocks (video period here)
224 | 	// Active video data (not included in sync buffers)
225 | 	int data_pad = H_FRONT - 10;
226 | 	int video_pad = H_BACK - 12; // Data island has 2 guardbands, so we subtract 2 guardbands total (2nd data island one and video data one)
227 | 	int j = 0;
228 | 	for(int i=0; i<data_pad; i++)
229 | 	{
230 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3]; // Since signals are active low, during this period they're both high
231 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[0]; // Channels 1 and 2 are to be kept low
232 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
233 | 
234 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[3]; // The falling edge of vsync comes later
235 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[0]; // Program throws a segmentation fault here because it accesses the wrong address, wtf???
236 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
237 | 
238 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1]; // Bit 1 is vsync, so it is to be kept low during this period
239 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[0];
240 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
241 | 
242 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[1]; // The rising edge of vsync comes later
243 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[0];
244 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
245 | 	}
246 | 	for(int i=0; i<8; i++)
247 | 	{
248 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3];
249 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[1];
250 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[1];
251 | 
252 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[3]; // Not yet
253 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[1];
254 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[1];
255 | 
256 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1];
257 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[1];
258 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[1];
259 | 
260 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[1]; // Not yet
261 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[1];
262 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[1];
263 | 	}
264 | 	for(int i=0; i<2; i++)
265 | 	{
266 | 		// Channel 0: transmits hsync and vsync terc4 encoded with top 2 bits set
267 | 		// Channels 1 and 2: transmit guardband
268 | 		sync_buffer->hblank_ch0[j] = terc4_table[15];
269 | 		sync_buffer->hblank_ch1[j] = guardband_states[1]; //0b0100110011
270 | 		sync_buffer->hblank_ch2[j] = guardband_states[1];
271 | 
272 | 		sync_buffer->vblank_en_ch0[j] = terc4_table[15];
273 | 		sync_buffer->vblank_en_ch1[j] = guardband_states[1];
274 | 		sync_buffer->vblank_en_ch2[j] = guardband_states[1];
275 | 
276 | 		sync_buffer->vblank_syn_ch0[j] = terc4_table[13]; // vsync is still active, so transmit 0b1101
277 | 		sync_buffer->vblank_syn_ch1[j] = guardband_states[1];
278 | 		sync_buffer->vblank_syn_ch2[j] = guardband_states[1];
279 | 
280 | 		sync_buffer->vblank_ex_ch0[j] = terc4_table[13];
281 | 		sync_buffer->vblank_ex_ch1[j] = guardband_states[1];
282 | 		sync_buffer->vblank_ex_ch2[j++] = guardband_states[1];
283 | 	}
284 | 	for(int i=0; i<H_PULSE; i++)
285 | 	{
286 | 		// Here is where the vsync pulse is allowed to transition (can't remember if it's before or after the hsync pulse though)
287 | 		// Channel 0 transmits sync signals terc4 encoded either with bit 3 set or reset, bit 2 is reset for null header
288 | 		// Bit 3 reset: 2 and 0, bit 3 set: 10 and 8 (going with bit set first)
289 | 		sync_buffer->hblank_ch0[j] = terc4_table[10]; //0b1010
290 | 		sync_buffer->hblank_ch1[j] = terc4_table[8]; // Transmit null packets
291 | 		sync_buffer->hblank_ch2[j] = terc4_table[8];
292 | 
293 | 		sync_buffer->vblank_en_ch0[j] = terc4_table[8]; //0b1000
294 | 		sync_buffer->vblank_en_ch1[j] = terc4_table[8];
295 | 		sync_buffer->vblank_en_ch2[j] = terc4_table[8];
296 | 
297 | 		sync_buffer->vblank_syn_ch0[j] = terc4_table[8]; //0b1000
298 | 		sync_buffer->vblank_syn_ch1[j] = terc4_table[8];
299 | 		sync_buffer->vblank_syn_ch2[j] = terc4_table[8];
300 | 
301 | 		sync_buffer->vblank_ex_ch0[j] = terc4_table[10]; //0b1010
302 | 		sync_buffer->vblank_ex_ch1[j] = terc4_table[8];
303 | 		sync_buffer->vblank_ex_ch2[j++] = terc4_table[8];
304 | 	}
305 | 	for(int i=0; i<2; i++)
306 | 	{
307 | 		sync_buffer->hblank_ch0[j] = terc4_table[15];
308 | 		sync_buffer->hblank_ch1[j] = guardband_states[0]; //0b0100110011
309 | 		sync_buffer->hblank_ch2[j] = guardband_states[0];
310 | 
311 | 		sync_buffer->vblank_en_ch0[j] = terc4_table[13]; //vsync is now active, so transmit 0b1101
312 | 		sync_buffer->vblank_en_ch1[j] = guardband_states[0];
313 | 		sync_buffer->vblank_en_ch2[j] = guardband_states[0];
314 | 
315 | 		sync_buffer->vblank_syn_ch0[j] = terc4_table[13]; //vsync is still active
316 | 		sync_buffer->vblank_syn_ch1[j] = guardband_states[0];
317 | 		sync_buffer->vblank_syn_ch2[j] = guardband_states[0];
318 | 
319 | 		sync_buffer->vblank_ex_ch0[j] = terc4_table[15]; //vsync is no longer active
320 | 		sync_buffer->vblank_ex_ch1[j] = guardband_states[0];
321 | 		sync_buffer->vblank_ex_ch2[j++] = guardband_states[0];
322 | 	}
323 | 	for(int i=0; i<video_pad; i++)
324 | 	{
325 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3]; // Since signals are active low, during this period they're both high
326 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[0]; // Channels 1 and 2 are to be kept low
327 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
328 | 
329 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[1]; // vsync is low
330 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[0];
331 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
332 | 
333 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1]; // vsync is low
334 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[0];
335 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
336 | 
337 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[3]; // vsync is high
338 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[0];
339 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
340 | 	}
341 | 	for(int i=0; i<8; i++)
342 | 	{
343 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3];
344 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[1];
345 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
346 | 
347 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[1];
348 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[1];
349 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
350 | 
351 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1];
352 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[1];
353 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
354 | 
355 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[3];
356 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[1];
357 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
358 | 	}
359 | 	for(int i=0; i<2; i++)
360 | 	{
361 | 		sync_buffer->hblank_ch0[j] = guardband_states[0]; //0b1011001100
362 | 		sync_buffer->hblank_ch1[j] = guardband_states[1]; 
363 | 		sync_buffer->hblank_ch2[j] = guardband_states[0];
364 | 
365 | 		sync_buffer->vblank_en_ch0[j] = guardband_states[0];
366 | 		sync_buffer->vblank_en_ch1[j] = guardband_states[1];
367 | 		sync_buffer->vblank_en_ch2[j] = guardband_states[0];
368 | 
369 | 		sync_buffer->vblank_syn_ch0[j] = guardband_states[0];
370 | 		sync_buffer->vblank_syn_ch1[j] = guardband_states[1];
371 | 		sync_buffer->vblank_syn_ch2[j] = guardband_states[0];
372 | 
373 | 		sync_buffer->vblank_ex_ch0[j] = guardband_states[0];
374 | 		sync_buffer->vblank_ex_ch1[j] = guardband_states[1];
375 | 		sync_buffer->vblank_ex_ch2[j++] = guardband_states[0];
376 | 	}
377 | 	char buffer_name[] = "nm";
378 | 	create_sync_files(buffer_name, sync_buffer);
379 | 
380 | 	return;
381 | }
382 | 
383 | // Creates sync buffers without the data island period.
384 | // Basically, just 190 pixel clocks' worth of normal sync data before the video guardband.
385 | // No data island preamble, no data island guardbands, just the video preamble and guard band.
386 | void create_sync_buffers_nodat()
387 | {
388 | 	struct sync_buffer_t *sync_buffer = (struct sync_buffer_t *)malloc(sizeof(struct sync_buffer_t));
389 | 
390 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch0));
391 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch1));
392 | 	allocate_sync_buffer(&(sync_buffer->hblank_ch2));
393 | 
394 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch0));
395 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch1));
396 | 	allocate_sync_buffer(&(sync_buffer->vblank_en_ch2));
397 | 
398 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch0));
399 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch1));
400 | 	allocate_sync_buffer(&(sync_buffer->vblank_syn_ch2));
401 | 
402 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch0));
403 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch1));
404 | 	allocate_sync_buffer(&(sync_buffer->vblank_ex_ch2));
405 | 
406 | 	int video_pad = (H_TOTAL - H_ACTIVE) - 10; // Subtract the 10 pixels used for video preamble and guardband
407 | 	int sync_pad = ((H_TOTAL - H_ACTIVE) - H_BACK) - H_PULSE;
408 | 	int j = 0;
409 | 	for(int i=0; i<sync_pad; i++)
410 | 	{
411 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3];
412 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[0];
413 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
414 | 
415 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[3];
416 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[0];
417 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
418 | 
419 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1];
420 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[0];
421 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
422 | 
423 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[1];
424 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[0];
425 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
426 | 	}
427 | 	for(int i=0; i<H_PULSE; i++)
428 | 	{
429 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[2]; 
430 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[0];
431 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
432 | 
433 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[0];
434 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[0];
435 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
436 | 
437 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[0]; 
438 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[0];
439 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
440 | 
441 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[2];
442 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[0];
443 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
444 | 	}
445 | 	// Only these last two remain the same
446 | 	for(int i=0; i<8; i++)
447 | 	{
448 | 		sync_buffer->hblank_ch0[j] = sync_ctl_states[3];
449 | 		sync_buffer->hblank_ch1[j] = sync_ctl_states[1];
450 | 		sync_buffer->hblank_ch2[j] = sync_ctl_states[0];
451 | 
452 | 		sync_buffer->vblank_en_ch0[j] = sync_ctl_states[1];
453 | 		sync_buffer->vblank_en_ch1[j] = sync_ctl_states[1];
454 | 		sync_buffer->vblank_en_ch2[j] = sync_ctl_states[0];
455 | 
456 | 		sync_buffer->vblank_syn_ch0[j] = sync_ctl_states[1];
457 | 		sync_buffer->vblank_syn_ch1[j] = sync_ctl_states[1];
458 | 		sync_buffer->vblank_syn_ch2[j] = sync_ctl_states[0];
459 | 
460 | 		sync_buffer->vblank_ex_ch0[j] = sync_ctl_states[3];
461 | 		sync_buffer->vblank_ex_ch1[j] = sync_ctl_states[1];
462 | 		sync_buffer->vblank_ex_ch2[j++] = sync_ctl_states[0];
463 | 	}
464 | 	for(int i=0; i<2; i++)
465 | 	{
466 | 		sync_buffer->hblank_ch0[j] = guardband_states[0];
467 | 		sync_buffer->hblank_ch1[j] = guardband_states[1]; 
468 | 		sync_buffer->hblank_ch2[j] = guardband_states[0];
469 | 
470 | 		sync_buffer->vblank_en_ch0[j] = guardband_states[0];
471 | 		sync_buffer->vblank_en_ch1[j] = guardband_states[1];
472 | 		sync_buffer->vblank_en_ch2[j] = guardband_states[0];
473 | 
474 | 		sync_buffer->vblank_syn_ch0[j] = guardband_states[0];
475 | 		sync_buffer->vblank_syn_ch1[j] = guardband_states[1];
476 | 		sync_buffer->vblank_syn_ch2[j] = guardband_states[0];
477 | 
478 | 		sync_buffer->vblank_ex_ch0[j] = guardband_states[0];
479 | 		sync_buffer->vblank_ex_ch1[j] = guardband_states[1];
480 | 		sync_buffer->vblank_ex_ch2[j++] = guardband_states[0];
481 | 	}
482 | 
483 | 	char buffer_name[] = "nd";
484 | 	create_sync_files(buffer_name, sync_buffer);
485 | 
486 | 	return;
487 | }
488 | 
489 | // Packs a single channel into a buffer. Used to reduce copy and pasting.
490 | // Takes pointers to a uint16_t input buffer and uint32_t output buffer.
491 | // Buffer size is in multiples of 16 pixels.
492 | void pack_buffer_single(uint16_t *in_buffer, uint32_t *out_buffer, int buffer_size)
493 | {
494 | 	int in_pos = 0, out_pos = 0;
495 | 	uint32_t temp_word = 0;
496 | 	for(int i=0; i<buffer_size; i++)
497 | 	{
498 | 		temp_word = ((uint32_t)(in_buffer[in_pos++]));
499 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<10;
500 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<20;
501 | 		temp_word |= (((uint32_t)(in_buffer[in_pos]))&0x03)<<30;
502 | 		out_buffer[out_pos++] = temp_word;
503 | 		// Next word has bottom 2 bits cut off
504 | 		temp_word = ((uint32_t)(in_buffer[in_pos++]))>>2;
505 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<8;
506 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<18;
507 | 		temp_word |= (((uint32_t)(in_buffer[in_pos]))&0x0f)<<28;
508 | 		out_buffer[out_pos++] = temp_word;
509 | 		// Next word has bottom 4 bits cut off
510 | 		temp_word = ((uint32_t)(in_buffer[in_pos++]))>>4;
511 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<6;
512 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<16;
513 | 		temp_word |= (((uint32_t)(in_buffer[in_pos]))&0x3f)<<26;
514 | 		out_buffer[out_pos++] = temp_word;
515 | 		// Next word has bottom 6 bits cut off
516 | 		temp_word = ((uint32_t)(in_buffer[in_pos++]))>>6;
517 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<4;
518 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<14;
519 | 		temp_word |= (((uint32_t)(in_buffer[in_pos]))&0xff)<<24;
520 | 		out_buffer[out_pos++] = temp_word;
521 | 		// Next word has bottom 8 bits cut off- this allows the 30 other bits to be filled in
522 | 		temp_word = ((uint32_t)(in_buffer[in_pos++]))>>8;
523 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<2;
524 | 		temp_word |= ((uint32_t)(in_buffer[in_pos++]))<<12;
525 | 		temp_word |= (((uint32_t)(in_buffer[in_pos]))&0x3ff)<<22;
526 | 		out_buffer[out_pos++] = temp_word;
527 | 	}
528 | 	return;
529 | }
530 | 
531 | // Creates the files for the hblank stuff.
532 | // Copying and pasting is the bane of my existance but at the moment I don't know a better way to do this.
533 | // Also packs the data from the sync buffers. 16 10-bit TMDS words fit into 5 32-bit words.
534 | // There are 192 TMDS words per buffer channel, so they would fit it ((192/16)=12)*5 = 60 32-bit words.
535 | // All variations take up a total of 2880 bytes in RAM.
536 | void create_sync_files(char *name, struct sync_buffer_t *sync_buffer)
537 | {
538 | 	struct sync_buffer_32_t *pack_buffer = (struct sync_buffer_32_t *)malloc(sizeof(struct sync_buffer_32_t));
539 | 
540 | 	allocate_sync_buffer_32(&(pack_buffer->hblank_ch0));
541 | 	allocate_sync_buffer_32(&(pack_buffer->hblank_ch1));
542 | 	allocate_sync_buffer_32(&(pack_buffer->hblank_ch2));
543 | 
544 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_en_ch0));
545 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_en_ch1));
546 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_en_ch2));
547 | 
548 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_syn_ch0));
549 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_syn_ch1));
550 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_syn_ch2));
551 | 
552 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_ex_ch0));
553 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_ex_ch1));
554 | 	allocate_sync_buffer_32(&(pack_buffer->vblank_ex_ch2));
555 | 
556 | 	// 16 TMDS words fit into 5 32-bit words. There are 192 pixels during hblank in total, so the buffers are 60 words each.
557 | 	pack_buffer_single(sync_buffer->hblank_ch0, pack_buffer->hblank_ch0, 12);
558 | 	pack_buffer_single(sync_buffer->hblank_ch1, pack_buffer->hblank_ch1, 12);
559 | 	pack_buffer_single(sync_buffer->hblank_ch1, pack_buffer->hblank_ch2, 12);
560 | 
561 | 	pack_buffer_single(sync_buffer->vblank_en_ch0, pack_buffer->vblank_en_ch0, 12);
562 | 	pack_buffer_single(sync_buffer->vblank_en_ch1, pack_buffer->vblank_en_ch1, 12);
563 | 	pack_buffer_single(sync_buffer->vblank_en_ch2, pack_buffer->vblank_en_ch2, 12);
564 | 
565 | 	pack_buffer_single(sync_buffer->vblank_syn_ch0, pack_buffer->vblank_syn_ch0, 12);
566 | 	pack_buffer_single(sync_buffer->vblank_syn_ch1, pack_buffer->vblank_syn_ch1, 12);
567 | 	pack_buffer_single(sync_buffer->vblank_syn_ch2, pack_buffer->vblank_syn_ch2, 12);
568 | 
569 | 	pack_buffer_single(sync_buffer->vblank_ex_ch0, pack_buffer->vblank_ex_ch0, 12);
570 | 	pack_buffer_single(sync_buffer->vblank_ex_ch1, pack_buffer->vblank_ex_ch1, 12);
571 | 	pack_buffer_single(sync_buffer->vblank_ex_ch2, pack_buffer->vblank_ex_ch2, 12);
572 | 
573 |     free_sync_buffers(sync_buffer); // Frees the struct too. Works properly.
574 | 
575 |     char file_name[32];
576 | 	
577 | 	sprintf(file_name, "hblank_ch0_%s.bin", name);
578 | 	FILE *hblank_ch0 = fopen(file_name, "wb");
579 | 	fwrite(pack_buffer->hblank_ch0, 4, 60, hblank_ch0);
580 | 	fclose(hblank_ch0);
581 | 
582 | 	sprintf(file_name, "hblank_ch1_%s.bin", name);
583 | 	FILE *hblank_ch1 = fopen(file_name, "wb");
584 | 	fwrite(pack_buffer->hblank_ch1, 4, 60, hblank_ch1);
585 | 	fclose(hblank_ch1);
586 | 
587 | 	sprintf(file_name, "hblank_ch2_%s.bin", name);
588 | 	FILE *hblank_ch2 = fopen(file_name, "wb");
589 | 	fwrite(pack_buffer->hblank_ch2, 4, 60, hblank_ch2);
590 | 	fclose(hblank_ch2);
591 | 	// Enter
592 | 	sprintf(file_name, "vblank_en_ch0_%s.bin", name);
593 | 	FILE *vblank_en_ch0 = fopen(file_name, "wb");
594 | 	fwrite(pack_buffer->vblank_en_ch0, 4, 60, vblank_en_ch0);
595 | 	fclose(vblank_en_ch0);
596 | 
597 | 	sprintf(file_name, "vblank_en_ch1_%s.bin", name);
598 | 	FILE *vblank_en_ch1 = fopen(file_name, "wb");
599 | 	fwrite(pack_buffer->vblank_en_ch1, 4, 60, vblank_en_ch1);
600 | 	fclose(vblank_en_ch1);
601 | 
602 | 	sprintf(file_name, "vblank_en_ch2_%s.bin", name);
603 | 	FILE *vblank_en_ch2 = fopen(file_name, "wb");
604 | 	fwrite(pack_buffer->vblank_en_ch2, 4, 60, vblank_en_ch2);
605 | 	fclose(vblank_en_ch2);
606 | 	// Sync
607 | 	sprintf(file_name, "vblank_syn_ch0_%s.bin", name);
608 | 	FILE *vblank_syn_ch0 = fopen(file_name, "wb");
609 | 	fwrite(pack_buffer->vblank_syn_ch0, 4, 60, vblank_syn_ch0);
610 | 	fclose(vblank_syn_ch0);
611 | 
612 | 	sprintf(file_name, "vblank_syn_ch1_%s.bin", name);
613 | 	FILE *vblank_syn_ch1 = fopen(file_name, "wb");
614 | 	fwrite(pack_buffer->vblank_syn_ch1, 4, 60, vblank_syn_ch1);
615 | 	fclose(vblank_syn_ch1);
616 | 
617 | 	sprintf(file_name, "vblank_syn_ch2_%s.bin", name);
618 | 	FILE *vblank_syn_ch2 = fopen(file_name, "wb");
619 | 	fwrite(pack_buffer->vblank_syn_ch2, 4, 60, vblank_syn_ch2);
620 | 	fclose(vblank_syn_ch2);
621 | 	// Exit
622 | 	sprintf(file_name, "vblank_ex_ch0_%s.bin", name);
623 | 	FILE *vblank_ex_ch0 = fopen(file_name, "wb");
624 | 	fwrite(pack_buffer->vblank_ex_ch0, 4, 60, vblank_ex_ch0);
625 | 	fclose(vblank_ex_ch0);
626 | 
627 | 	sprintf(file_name, "vblank_ex_ch1_%s.bin", name);
628 | 	FILE *vblank_ex_ch1 = fopen(file_name, "wb");
629 | 	fwrite(pack_buffer->vblank_ex_ch1, 4, 60, vblank_ex_ch1);
630 | 	fclose(vblank_ex_ch1);
631 | 
632 | 	sprintf(file_name, "vblank_ex_ch2_%s.bin", name);
633 | 	FILE *vblank_ex_ch2 = fopen(file_name, "wb");
634 | 	fwrite(pack_buffer->vblank_ex_ch2, 4, 60, vblank_ex_ch2);
635 | 	fclose(vblank_ex_ch2);
636 | 
637 | 	free_sync_buffers_32(pack_buffer);
638 | 	// Program never gets to this point because only the first array of the sync_buffer_32_t pack_buffer can be freed (hblank_ch0)
639 | 	return;
640 | }
641 | 
642 | // little endian
643 | // Input: 8-bit color value.
644 | uint16_t tmds_xor(uint8_t color_data)
645 | {
646 | 	uint16_t this_color = (uint16_t)color_data;
647 | 	uint16_t tmds_word = (this_color&0x01)<<15;
648 | 	this_color = this_color>>1;
649 | 	tmds_word = tmds_word>>1;
650 | 	for(int i=0; i<8; i++)
651 | 	{
652 | 		//shifts bit 0 of this_color to bit 15 to be XORed with the previous tmds_word bit shifted left by one
653 | 		//so it can be put back, shifted right and XORed again
654 | 		tmds_word |= (((this_color&0x01)<<15)^((tmds_word&0x4000)<<1))&0x8000;
655 | 		tmds_word = tmds_word>>1;
656 | 		this_color = this_color>>1;
657 | 	}
658 | 	tmds_word = (tmds_word>>7)&0xff; 
659 | 	tmds_word |= 0x100;
660 | 	return tmds_word;
661 | }
662 | 
663 | uint16_t tmds_xnor(uint8_t color_data)
664 | {
665 | 	uint16_t this_color = (uint16_t)color_data;
666 | 	uint16_t tmds_word = (this_color&0x01)<<15;
667 | 	this_color = this_color>>1;
668 | 	tmds_word = tmds_word>>1;
669 | 	for(int i=0; i<8; i++)
670 | 	{
671 | 		tmds_word |= (~(((this_color&0x01)<<15)^((tmds_word&0x4000)<<1)))&0x8000;
672 | 		tmds_word = tmds_word>>1;
673 | 		this_color = this_color>>1;
674 | 	}
675 | 	tmds_word = (tmds_word>>7)&0xff;
676 | 	return tmds_word;
677 | }
678 | 
679 | int ones_count(uint8_t color_data)
680 | {
681 | 	uint8_t this_color = color_data;
682 | 	int ones_cnt = 0;
683 | 	for(int i=0; i<8; i++)
684 | 	{
685 | 		if((this_color&0x01)==1) 
686 | 			ones_cnt++;
687 | 		this_color = this_color>>1;
688 | 	}
689 | 	return ones_cnt;
690 | }
691 | 
692 | //disparity is a 4-bit signed integer converted to a 4-bit unsigned integer
693 | //Current LUT has 2 words per entry: one for the 3 TMDS words it outputs for the same pixel, and one for the resulting disparity.
694 | void tmds_calc_disparity(struct tmds_pixel_t *tmds_pixel)
695 | {
696 | 	int this_disparity = tmds_pixel->disparity;
697 | 	int ones_cnt = ones_count(tmds_pixel->color_data);
698 | 	int zeros_cnt = 8-ones_cnt;
699 | 	uint16_t tmds_word = 0;
700 | 	// Is there an excess of ones or is bit 0 equal to 0 and ones_cnt is equal to 4?
701 | 	if(ones_cnt>4 || ((((tmds_pixel->color_data)&0x01)==0) && (ones_cnt==4)))
702 | 	{
703 | 		// If yes, XNOR
704 | 		tmds_word = tmds_xnor(tmds_pixel->color_data);
705 | 	}
706 | 	else
707 | 	{
708 | 		// If no, XOR
709 | 		tmds_word = tmds_xor(tmds_pixel->color_data);
710 | 	}
711 | 	
712 | 	// Is the previous disparity equal to 0 or ones equal to zeroes (4)?
713 | 	if(ones_cnt==zeros_cnt || (tmds_pixel->disparity)==0)
714 | 	{
715 | 		// If yes,
716 | 		// Bit 9 out = bit 8 in inverted,
717 | 		// Bit 8 out = bit 8 in,
718 | 		// XOR word with bit 8 state
719 | 
720 | 		// Is bit 8 reset?
721 | 		if((tmds_word&0x100)!=0)
722 | 		{
723 | 			// If no, 
724 | 			// Reset bit 9
725 | 			// Add the number of ones minus number of zeroes
726 | 			this_disparity = this_disparity+(ones_cnt-zeros_cnt);
727 | 			tmds_word = tmds_word&0x1ff;
728 | 		}
729 | 		else
730 | 		{
731 | 			// If yes, 
732 | 			// Set bit 9 
733 | 			// Invert lower 8 bits
734 | 			// Add the number of zeroes minus number of ones
735 | 			tmds_word = tmds_word^0xff;
736 | 			tmds_word |= 0x200;
737 | 			this_disparity = this_disparity+(zeros_cnt-ones_cnt);
738 | 			// If the disparity is zero
739 | 			// If more ones than zeros: disparity is -1*ones_cnt (up to -8)
740 | 			// If more zeros than ones: disparity could be 8 (oops)
741 | 			// ONLY IF it's all zeros, which could be avoided by inverting the LSB
742 | 		}
743 | 	}
744 | 	else
745 | 	{
746 | 		// If no,
747 | 		// Is the previous disparity more than zero AND there are more ones than zeroes, OR
748 | 		// Is the previous disparity less than zero AND there are more zeroes than ones?
749 | 		if(((tmds_pixel->disparity)>0 && ones_cnt>4) || ((tmds_pixel->disparity)<0 && ones_cnt<4))
750 | 		{
751 | 			// If yes,
752 | 			// Set bit 9
753 | 			// Invert lower 8 bits
754 | 			// Add the number of zeros minus number of ones
755 | 			// Add 2 to disparity if bit 8 is set
756 | 			tmds_word = tmds_word&0x1ff;
757 | 			tmds_word = tmds_word^0xff;
758 | 			tmds_word |= 0x200;
759 | 			this_disparity = this_disparity+(zeros_cnt-ones_cnt);
760 | 			if((tmds_word&0x100)!=0)
761 | 			{
762 | 				this_disparity = this_disparity+2;
763 | 			}
764 | 		}
765 | 		else
766 | 		{
767 | 			// If no,
768 | 			// Reset bit 9
769 | 			// Add the number of ones minus the number of zeroes
770 | 			// Subtract 2 from disparity if bit 8 is reset
771 | 			tmds_word = tmds_word&0x1ff;
772 | 			this_disparity = this_disparity+(ones_cnt-zeros_cnt);
773 | 			if((tmds_word&0x100)==0)
774 | 			{
775 | 				this_disparity = this_disparity-2;
776 | 			}
777 | 		}
778 | 	}
779 | 	tmds_pixel->disparity = this_disparity;
780 | 	tmds_pixel->tmds_data = tmds_word;
781 | 
782 | 	return;
783 | }
784 | 
785 | // The disparity should be pre-initialized, in a loop.
786 | // The LUT is 16*32*2 words long, or 4096 bytes.
787 | void tmds_pixel_repeat(uint32_t *lut_buf, struct tmds_pixel_t *tmds_pixel)
788 | {
789 | 	int dispy = tmds_pixel->disparity;
790 | 	tmds_calc_disparity(tmds_pixel);
791 | 	lut_buf[(((tmds_pixel->color_data_5b)<<1)|((((uint32_t)(dispy+8))&0x0f)<<6))&0x3fe] = (uint32_t)(tmds_pixel->tmds_data);
792 | 	tmds_calc_disparity(tmds_pixel);
793 | 	lut_buf[(((tmds_pixel->color_data_5b)<<1)|((((uint32_t)(dispy+8))&0x0f)<<6))&0x3fe] |= (uint32_t)((tmds_pixel->tmds_data)<<10);
794 | 	tmds_calc_disparity(tmds_pixel);
795 | 	lut_buf[(((tmds_pixel->color_data_5b)<<1)|((((uint32_t)(dispy+8))&0x0f)<<6))&0x3fe] |= (uint32_t)((tmds_pixel->tmds_data)<<20);
796 | 	lut_buf[((((tmds_pixel->color_data_5b)<<1)|((((uint32_t)(dispy+8))&0x0f)<<6))&0x3fe)+1] = ((uint32_t)((tmds_pixel->disparity)+8))<<6;
797 | 
798 | 	return;
799 | }
800 | 
801 | // This converts the GBC/GBA 5bpc colors into 8bpc with no color correction.
802 | uint8_t depth_convert(uint8_t c_in)
803 | {
804 | 	uint8_t c_out = (c_in<<3)|((c_in&0x1c)>>2);
805 | 	// Invert the LSB if 0xff or 0x00 to prevent disparity from going outside the signed 4-bit limit.
806 | 	if(c_out==0xff || c_out==0x00)
807 | 	{
808 | 		c_out = c_out^0x01;
809 | 	}
810 | 	return c_out;
811 | }
812 | 
813 | void create_avi_infoframe()
814 | {
815 | 	struct infoframe_header_t *packet_header = (struct infoframe_header_t *)malloc(sizeof(struct infoframe_header_t));
816 | 	struct infoframe_header_t *packet_header_v = (struct infoframe_header_t *)malloc(sizeof(struct infoframe_header_t));
817 |     struct infoframe_packet_t *info_packet = (struct infoframe_packet_t *)malloc(sizeof(struct infoframe_packet_t));
818 | 
819 | 	packet_header->terc4_r_header = (uint16_t *)malloc(32*sizeof(uint16_t));
820 | 	packet_header->terc4_en_header = (uint32_t *)malloc(10*sizeof(uint32_t));
821 | 	packet_header_v->terc4_r_header = (uint16_t *)malloc(32*sizeof(uint16_t));
822 | 	packet_header_v->terc4_en_header = (uint32_t *)malloc(10*sizeof(uint32_t));
823 | 
824 | 	info_packet->terc4_r_ch1 = (uint16_t *)malloc(32*sizeof(uint16_t));
825 | 	info_packet->terc4_en_ch1 = (uint32_t *)malloc(10*sizeof(uint32_t));
826 | 	info_packet->terc4_r_ch2 = (uint16_t *)malloc(32*sizeof(uint16_t));
827 | 	info_packet->terc4_en_ch2 = (uint32_t *)malloc(10*sizeof(uint32_t));
828 | 	info_packet->packet_data  = (uint8_t *)malloc(31);
829 | 
830 | 	packet_header->packet_type = AVI_PACKET_TYPE;
831 | 	packet_header->version = HDMI_VERSION;
832 | 	packet_header->packet_length = AVI_PACKET_LENGTH;
833 | 	packet_header->header_checksum = AVI_HEADER_CHECKSUM;
834 | 
835 | 	packet_header_v->packet_type = AVI_PACKET_TYPE;
836 | 	packet_header_v->version = HDMI_VERSION;
837 | 	packet_header_v->packet_length = AVI_PACKET_LENGTH;
838 | 	packet_header_v->header_checksum = AVI_HEADER_CHECKSUM;
839 | 
840 | 	info_packet->packet_checksum = 0x02; // VIC
841 | 	for(int i=0; i<31; i++)
842 | 	{
843 | 		info_packet->packet_data[i] = 0;
844 | 	}
845 | 	info_packet->packet_data[3] = 0x02;
846 | 
847 | 	uint8_t header_byte = packet_header->packet_type;
848 | 	int j = 0;
849 | 	for(int i=0; i<8; i++)
850 | 	{
851 | 		packet_header->terc4_r_header[j] = terc4_table[((header_byte&0x01)<<2)|sync_masks[1]];
852 | 		packet_header_v->terc4_r_header[j++] = terc4_table[((header_byte&0x01)<<2)|sync_masks[0]];
853 | 		header_byte = header_byte>>1;
854 | 	}
855 | 	header_byte = packet_header->version;
856 | 	for(int i=0; i<8; i++)
857 | 	{
858 | 		packet_header->terc4_r_header[j] = terc4_table[((header_byte&0x01)<<2)|sync_masks[1]];
859 | 		packet_header_v->terc4_r_header[j++] = terc4_table[((header_byte&0x01)<<2)|sync_masks[0]];
860 | 		header_byte = header_byte>>1;
861 | 	}
862 | 	header_byte = packet_header->packet_length;
863 | 	for(int i=0; i<8; i++)
864 | 	{
865 | 		packet_header->terc4_r_header[j] = terc4_table[((header_byte&0x01)<<2)|sync_masks[1]];
866 | 		packet_header_v->terc4_r_header[j++] = terc4_table[((header_byte&0x01)<<2)|sync_masks[0]];
867 | 		header_byte = header_byte>>1;
868 | 	}
869 | 	header_byte = packet_header->header_checksum;
870 | 	for(int i=0; i<8; i++)
871 | 	{
872 | 		packet_header->terc4_r_header[j] = terc4_table[((header_byte&0x01)<<2)|sync_masks[1]];
873 | 		packet_header_v->terc4_r_header[j++] = terc4_table[((header_byte&0x01)<<2)|sync_masks[0]];
874 | 		header_byte = header_byte>>1;
875 | 	}
876 | 
877 | 	info_packet->terc4_r_ch1[0] = terc4_table[((info_packet->packet_checksum)&0x0f)];
878 | 	info_packet->terc4_r_ch2[0] = terc4_table[((info_packet->packet_checksum)&0xf0)>>4];
879 | 	for(int i=1; i<32; i++)
880 | 	{
881 | 		info_packet->terc4_r_ch1[i] = terc4_table[((info_packet->packet_data[i-1])&0x0f)];
882 | 		info_packet->terc4_r_ch2[i] = terc4_table[((info_packet->packet_data[i-1])&0xf0)>>4];
883 | 	}
884 | 
885 | 	pack_buffer_single(packet_header->terc4_r_header, packet_header->terc4_en_header, 2);
886 | 	pack_buffer_single(packet_header_v->terc4_r_header, packet_header_v->terc4_en_header, 2);
887 | 	pack_buffer_single(info_packet->terc4_r_ch1, info_packet->terc4_en_ch1, 2);
888 | 	pack_buffer_single(info_packet->terc4_r_ch2, info_packet->terc4_en_ch2, 2);
889 | 
890 | 	FILE *terc4_header = fopen("terc4_hblank_ch0.bin", "wb");
891 | 	fwrite(packet_header->terc4_en_header, 4, 10, terc4_header);
892 | 	fclose(terc4_header);
893 | 
894 | 	FILE *terc4_header_v = fopen("terc4_vsync_ch0.bin", "wb");
895 | 	fwrite(packet_header_v->terc4_en_header, 4, 10, terc4_header_v);
896 | 	fclose(terc4_header_v);
897 | 
898 | 	FILE *terc4_ch1 = fopen("terc4_blank_ch1.bin", "wb");
899 | 	fwrite(info_packet->terc4_en_ch1, 4, 10, terc4_ch1);
900 | 	fclose(terc4_ch1);
901 | 
902 | 	FILE *terc4_ch2 = fopen("terc4_blank_ch2.bin", "wb");
903 | 	fwrite(info_packet->terc4_en_ch2, 4, 10, terc4_ch2);
904 | 	fclose(terc4_ch2);
905 | 
906 | 	free(packet_header->terc4_r_header);
907 | 	free(packet_header->terc4_en_header);
908 | 	free(packet_header);
909 | 
910 | 	free(packet_header_v->terc4_r_header);
911 | 	free(packet_header_v->terc4_en_header);
912 | 	free(packet_header_v);
913 | 
914 | 	free(info_packet->terc4_r_ch1);
915 | 	free(info_packet->terc4_en_ch1);
916 | 	free(info_packet->terc4_r_ch2);
917 | 	free(info_packet->terc4_en_ch2);
918 | 	free(info_packet->packet_data);
919 | 	free(info_packet);
920 | 
921 | 	return;
922 | }
923 | 
924 | // Requires at least the 5 bit color of the tmds pixel to be initialized.
925 | // Creates a file with the data inside it.
926 | void create_solid_line(char *name, struct tmds_pixel_t *pixel)
927 | {
928 | 	uint16_t *tmds_r_line = (uint16_t *)malloc(720*sizeof(uint16_t));
929 | 	uint32_t *tmds_en_line = (uint32_t *)malloc(225*sizeof(uint32_t));
930 | 	pixel->color_data = depth_convert(pixel->color_data_5b);
931 | 	for(int i=0; i<720; i++)
932 | 	{
933 | 		tmds_calc_disparity(pixel);
934 | 		tmds_r_line[i] = pixel->tmds_data;
935 | 	}
936 | 	pack_buffer_single(tmds_r_line, tmds_en_line, 45);
937 | 	free(tmds_r_line);
938 | 
939 | 	FILE *tmds_line = fopen(name, "wb");
940 | 	fwrite(tmds_en_line, 4, 225, tmds_line);
941 | 	fclose(tmds_line);
942 | 	free(tmds_en_line);
943 | 
944 | 	return;
945 | }
946 | 


--------------------------------------------------------------------------------