├── .gitignore
├── LICENSE.md
├── README.md
├── RISC-V.hw
    └── RISC-V.lpr
├── RISC-V.ip_user_files
    └── README.txt
├── RISC-V.srcs
    └── asynchronous
    │   ├── acib
    │       └── README.md
    │   ├── cpu
    │       ├── ALU
    │       │   ├── I
    │       │   │   ├── and.vhdl
    │       │   │   └── shift.vhdl
    │       │   ├── README.md
    │       │   ├── alu.vhdl
    │       │   └── insn
    │       │   │   └── insn_two_register.vhdl
    │       ├── README.md
    │       ├── adders
    │       │   ├── adder.vhdl
    │       │   ├── adders.md
    │       │   └── shcadder.vhdl
    │       ├── async_component_block_diagram.png
    │       ├── pipelines
    │       │   ├── README.md
    │       │   ├── ooe-pipeline.md
    │       │   ├── simple-pipeline.md
    │       │   └── simple-pipeline.vhdl
    │       ├── roadmap.md
    │       └── shifters
    │       │   ├── barrel_shifter.vhdl
    │       │   └── barrel_shifter_no_signex.vhdl
    │   └── infrastructure
    │       ├── handshake
    │           ├── README.md
    │           ├── handshake.vhdl
    │           ├── ncl_async_logical_not.png
    │           ├── ncl_async_register.png
    │           └── register.vhdl
    │       ├── ncl
    │           ├── README.md
    │           └── ncl.vhdl
    │       └── transceiver
    │           ├── transceiver_async_to_sync.vhdl
    │           └── transceiver_sync_to_async.vhdl
└── RISC-V.xpr


/.gitignore:
--------------------------------------------------------------------------------
1 | .~
2 | *.cache/
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # Derivatives of Other Works
 2 | 
 3 | Works herein derived from other works with incompatible licenses are subject
 4 | to the terms of those respective license.
 5 | 
 6 | # Original Works in this Repository
 7 | 
 8 | The MIT License
 9 | 
10 | Copyright (c) 2020 Moonset Technologies, LLC
11 | 
12 | Permission is hereby granted, free of charge, to any person obtaining a copy
13 | of this software and associated documentation files (the "Software"), to deal
14 | in the Software without restriction, including without limitation the rights
15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 | copies of the Software, and to permit persons to whom the Software is
17 | furnished to do so, subject to the following conditions:
18 | 
19 | The above copyright notice and this permission notice shall be included in
20 | all copies or substantial portions of the Software.
21 | 
22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 | THE SOFTWARE.
29 | 
30 | # Expiration
31 | 
32 | All works in this repository legally covered by the above license shall remain
33 | so for a term of seven (7) years after initial publication.  Updates to such
34 | works shall begin their own separate licensing term at the time of their own
35 | publication.  Nothing in this license shall be construed to extend the
36 | licensing term on any version of any work covered herein.
37 | 
38 | Upon expiration of the licensing term, all in this repository legally covered
39 | by the above license shall immediately become licensed under the Creative
40 | Commons "CC0" license and shall simultaneously be declared as in the public
41 | domain by the common law meanin in the United States as understood in 2020.
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | RISC-V Soft CPU
 2 | ===============
 3 | 
 4 | The RISC-V Soft CPU provides various CPU packages.
 5 | 
 6 | # RV32/64IM
 7 | 
 8 | This is a small-footprint, embedded processor conformant to the following
 9 | RISC-V standards:
10 | 
11 | * RV32/64I 2.1
12 | * M 2.0
13 |   * Uses FPGA multipliers
14 |   * Paravartya integer division implementation
15 | 
16 | The FPGA implementation of RISC-V is likely unaffected by registers, as LUT
17 | registers are almost never a resource constraint and BRAM is often plentiful.
18 | 
19 | 64-bit extension instructions add 15 RV64I and 5 RV64M instructions.
20 | 
21 | This implements only the M machine mode privileged system, and has the
22 | following MISA flags available:
23 | 
24 | * E
25 | * I
26 | * M
27 | 
28 | This core is suitable for embedded environments, notably for the Retro-1 BIOS
29 | implementation.  UEFI always runs in M mode on the BIOS embedded CPU.  This
30 | core implements no pipelines, simple adders, and synchronous operations to
31 | minimize size.
32 | 
33 | # RV32/64IM-Counters-Zicsr-Zifencei
34 | 
35 | This extends the RV32/64IM with the following:
36 | 
37 | * Counters 2.0 Draft
38 |   * Draft for counters
39 |   * Cycle counter uses adder loop when non-retired instructions in pipeline:  adder increments counter CSR
40 | * Zfencei
41 | 
42 | This core also implements the M, S, and U privilege levels, and so implements
43 | MISA flags:
44 | 
45 | * E
46 | * I
47 | * M
48 | * S
49 | * U
50 | 
51 | This core is suitable for running Linux or Minix operating systems.
52 | 
53 | This core implements simple pipelines, Han-Carlson adders, and NULL Convention
54 | Logic for asynchronous execution.  It eschews floating point due to large area
55 | usage.
56 | 
57 | # RV32/64IMAFDQC-Counters-Zicsr-Zifencei-Hypervisor
58 | 
59 | This extends the RV32/64IM core with floating point and hypervisor support.
60 | This is a *large* core implementing as much logic as possible as NCL.
61 | 
62 | This core does not exclude simultaneous multithreading (SMT), out-of-order
63 | execution (OOE), speculative execution, runahead, and so forth.  It includes
64 | custom counters to determine which facilities stall the most (e.g. contention
65 | for adders, multipliers, registers in register renaming) to guide customized
66 | implementation.
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/RISC-V.hw/RISC-V.lpr:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!-- Product Version: Vivado v2019.2 (64-bit)                     -->
3 | <!--                                                              -->
4 | <!-- Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.        -->
5 | 
6 | <labtools version="1" minor="0"/>
7 | 


--------------------------------------------------------------------------------
/RISC-V.ip_user_files/README.txt:
--------------------------------------------------------------------------------
1 | The files in this directory structure are automatically generated and managed by Vivado. Editing these files is not recommended.
2 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/acib/README.md:
--------------------------------------------------------------------------------
  1 | Asynchronous Component Interface Bus
  2 | ====================================
  3 | 
  4 | This describes an Asynchronous Component Interface Bus (ACIB), a bus to
  5 | connect components with an asynchronous communications protocol.
  6 | 
  7 | This is an extreme rough draft.
  8 | 
  9 | # Asynchronous Differential Null Convention Coding
 10 | 
 11 | ACIB uses Asynchronous Differential Null Convention Coding (ADNCC) to transmit
 12 | data.
 13 | 
 14 | ADNCC uses a serial differential pair to transmit a NULL Convention Logic (NCL)
 15 | signal from a sender to a receiver.  Unlike NCL communications internal to a
 16 | given IC, ACBI is serial and self-timing.  Like all component buses, it uses
 17 | signal negotiation and data error detection and correction to preserve data
 18 | integrity.  This can create more latency, but high throughput, which is more
 19 | appropriate for communications between components.
 20 | 
 21 | ## Differential Pairs
 22 | 
 23 | ADNCC communicates via differential pairs.  Each pair has a fixed positive `p`
 24 | and a differential negative `n` rail carrying an NCL bit of `[p n]`.  The
 25 | rails are considered equivalent with a NULL value of `[0 0]` when within a
 26 | defined voltage of one another; otherwise, the more-negative rail is the
 27 | `1` bit.
 28 | 
 29 | Given a base voltage of 0 on both rails and a 50mV standard signal
 30 | differential, the signal would read as follows:
 31 | 
 32 | | `p` | `n`   | NCL     | Value
 33 | | ---:| -----:| -------:| ----:
 34 | |   0 | +50mV | `[1 0]` | `0`
 35 | |   0 |   0mV | `[0 0]` | `NULL`
 36 | |   0 | -50mV | `[0 1]` | `1`
 37 | 
 38 | To transmit multiple `0` or `1` bits in series, the sender must transition to
 39 | `NULL` between each bit; whereas a transition directly between `0` and `1`
 40 | is always accepted for several reasons:
 41 | 
 42 | * The `p` rail is constant, so there's no chance of a glitch from propagation
 43 | delay on `p`;
 44 | * A transition between `0` and `1` on the `n` line must necessarily pass
 45 | through `NULL`, which would only signal that the next non-`NULL` value is a
 46 | an intended data bit, and so is implicit; and
 47 | * If the `n` line can spuriously transition between `0` and `1`, the circuit
 48 | can spuriously transition between `NULL` and non-`NULL`, and no data integrity
 49 | is possible.
 50 | 
 51 | This together means there is no value in requiring a `NULL` between valid and
 52 | distinct `0` and `1` signals; rather a transition between *any* of the three
 53 | states is valid, and the `NULL` state is just not data and not recorded.
 54 | 
 55 | ## Voltage Characteristics
 56 | 
 57 | ADNCC uses a variable transmission voltage.  Any voltage differential between
 58 | 50mV and 300mV is acceptable, negotiated between the two endpoints.  `NULL` is
 59 | always *sent* as `+0mV`, and the threshold for transition to `NULL` is 1/3 the
 60 | voltage differential, while transition to not-`null` is 2/3 the voltage.
 61 | 
 62 | (FIXME:  is that reasonable thresholding?)
 63 | 
 64 | Implementations are not required to support all voltages.  Implementations
 65 | *must* support each of `p`+/-50mV, `p`+/-150mV, and `p`+/-300mV.
 66 | 
 67 | ## Signal Negotiation
 68 | 
 69 | Bus protocols over ADNCC must uses packet error detection and correction, as
 70 | is the case with most modern bus protocols.  Bus protocols using ADNCC may
 71 | responds to error rate by:
 72 | 
 73 | * Implementing error-correcting coding;
 74 | * Negotiating a diffrent voltage differential; or
 75 | * Clocking the sender.
 76 | 
 77 | If a voltage differential of 300mV does not result in a low-error connection,
 78 | the sender may physically delay each transition to align with a clock signal,
 79 | varying this clock and the voltage differential to achieve optimal transmission
 80 | rate.  The receiver doesn't concern itself with the error rate.
 81 | 
 82 | ## Signal Transmission Rate
 83 | 
 84 | ADNCC provides asynchronous transmission of digital signals.  Bus protocols
 85 | using ADNCC must negotiate packet size and manage error over this coding.
 86 | 
 87 | ADNCC will operate at higher or lower baud rate based on cable length,
 88 | temperature, encoding and decoding hardware, and other characteristics.  It
 89 | is delay-insensitive, but requires readable signal.
 90 | 
 91 | Data transmission may, in some cases, overwhelm the capabalities of the
 92 | receiver in buffering and processing the data.  This causes errors unrelated
 93 | to the transmission protocol, but rather to the sheer volume of data received.
 94 | Bus protocols using ADNCC must handle these errors either by negotiating
 95 | packet size and transmission rate or by slowing down the data transmission as
 96 | in any other error condition.
 97 | 
 98 | # Asynchronous Component Interface Bus
 99 | 
100 | Asynchronous Component Interface Bus (ACIB) uses ADNCC to provide an
101 | asynchronous data bus.
102 | 
103 | ## Electrical Characteristics
104 | 
105 | ACIB uses two types of connectors:  a 20-pin interface and ...
106 | 
107 | ### 20-pin connector
108 | 
109 | The 20-pin connector is pin-compatible with Type-C, including a maximum 100W
110 | power delivery and four differential pairs.  The differential pairs must
111 | operate as ADNCC in ACIB mode.
112 | 
113 | ### X-pin connector
114 | 
115 | TBD:  Number of pairs, power characteristics.
116 | 
117 | ## Bus protocol
118 | 
119 | XXX:  Bus protocol
120 | 
121 | Packets have specific connection ID attached to them.
122 | 
123 | DMA is negotiated to specific memory areas via a memory controller.
124 | 
125 | ### Error Correction
126 | 
127 | ACIB uses a fast, variable Reed-Solomon coding to correct for errors, as well
128 | as variation of the ADNCC physical layer.
129 | 
130 | TBD:  Specific RS Coding, fast hardware implementation.
131 | 
132 | ## Implementation considerations
133 | 
134 | ACIB transceivers may support multiple devices and simultaneous communication
135 | with the host.  Such devices may use multiplexers in a one-to-many or
136 | many-to-many configuration to allow simultaneous communication.
137 | 
138 | ACIB controls the communication between two ACIB devices.  ADNCC does not
139 | negotiate asynchronous data flow, but only uses delay-insensitive data
140 | transmission.  ACIB can delay data flow by delaying an acknowledgement of
141 | readiness for a packet.
142 | 
143 | An ACIB transceiver may interface asynchronously with the host device via a
144 | handshake protocol, notably when the ACIB transceiver is integrated into a
145 | SoC.  This automatically manages behavior related to data transfer and
146 | processing capability:  if the ACIB transciever can buffer all data packets
147 | it requests or accepts, then it can wait to acknowledge pending requests or
148 | make new requests simply by waiting until its internal buffers are flushed.
149 | If this happens over an asynchronous handshake protocol, then the transceiver
150 | waits precisely until the host receives and acknowledges its receipt of the
151 | buffered data.
152 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/ALU/I/and.vhdl:
--------------------------------------------------------------------------------
  1 | -- vim: sw=4 ts=4 et
  2 | -- AND a sign-extended 12-bit immediate register
  3 | library IEEE;
  4 | use IEEE.std_logic_1164.all;
  5 | library async_ncl;
  6 | use async_ncl.ncl.all;
  7 | use work.e_ncl_logic_register;
  8 | use work.e_riscv_insn_async_2reg_infra;
  9 | 
 10 | entity e_riscv_insn_async_2reg is
 11 |     generic ( XLEN : positive );
 12 |     port (
 13 |         -- Receiver port and handshake
 14 |         rs1  : in  ncl_logic_vector(XLEN-1 downto 0);
 15 |         rs2  : in  ncl_logic_vector(XLEN-1 downto 0);
 16 |         insn : in  ncl_logic_vector(31 downto 0);
 17 |         Rr   : out std_logic;
 18 |         Wr   : in  std_logic;
 19 |         -- Sender port and handshake
 20 |         Dout : out ncl_logic_vector(XLEN-1 downto 0);
 21 |         Rs   : in  std_logic;
 22 |         Ws   : out std_logic
 23 |     );
 24 | end e_riscv_insn_async_2reg;
 25 | 
 26 | architecture riscv_i_async_bitmask of e_riscv_insn_async_2reg is
 27 |     signal Din   : ncl_logic_vector( (rs1'LENGTH
 28 |                                     + rs2'LENGTH
 29 |                                     + insn'LENGTH)-1 downto 0);
 30 |     -- Buffered into a delay-insensitive register
 31 |     signal in_buffer : ncl_logic_vector( (  rs1'LENGTH
 32 |                                           + rs2'LENGTH
 33 |                                           + insn'LENGTH)-1 downto 0);
 34 | 
 35 |     signal r_rs1  : ncl_logic_vector(rs1'RANGE);
 36 |     signal r_rs2  : ncl_logic_vector(rs2'RANGE);
 37 |     signal r_insn : ncl_logic_vector(insn'RANGE);
 38 | 
 39 |     -- Data extracted from the buffered instruction
 40 |     alias opcode : ncl_logic_vector(6 downto 0)  is r_insn(6 downto 0);
 41 |     -- I-type immediate value
 42 |     alias imm    : ncl_logic_vector(11 downto 0) is r_insn(31 downto 20);
 43 |     -- R-type
 44 |     alias funct7 : ncl_logic_vector(6 downto 0)  is r_insn(31 downto 25);
 45 |     alias funct3 : ncl_logic_vector(2 downto 0)  is r_insn(14 downto 12);
 46 |     -- opcode is 0010011 if I-type, 0110011 if R-type
 47 |     alias rtype  : ncl_logic is r_insn(5);
 48 | begin
 49 | 
 50 |     -- DI registered buffer 
 51 |     r_infra: entity e_riscv_insn_async_2reg_infra(riscv_insn_async_2reg_infra)
 52 |         generic map (XLEN => XLEN)
 53 |         port map
 54 |         (rs1   => rs1,
 55 |          rs2   => rs2,
 56 |          insn  => insn,
 57 |          Rr    => Rr,
 58 |          Wr    => Wr,
 59 |          Rs    => Rs,
 60 |          Ws    => Ws,
 61 |          -- Buffered registers
 62 |          rs1b  => r_rs1,
 63 |          rs2b  => r_rs2,
 64 |          insnb => r_insn,
 65 |          -- send output to 2reg infrastructure
 66 |          rdl   => Dout 
 67 |         );
 68 | 
 69 |     -- TODO:
 70 |     -- instantiate an ncl_logic_register of length
 71 |     --   (rs1'LENGTH + rs2'LENGTH + insn'LENGTH)
 72 |     -- and handshake to store input into that register.
 73 |     --
 74 |     -- Rewrite slices above to use this register 
 75 | 
 76 |     bitmask : process(all) is
 77 |     begin
 78 |         -- FIXME:  Handshake.  We need the handshake or this WILL fail.
 79 |         if ( rtype = ncl_encode('1') ) then
 80 |             -- R-type opcode
 81 |             if ((funct3(2) AND funct3(1) AND funct3(0)) = ncl_encode('1')) then
 82 |                 -- funct3 = 111 is AND              
 83 |                 Dout <= r_rs1 AND r_rs2;
 84 |             elsif (    ((funct3(2) AND funct3(1)) = ncl_encode('1'))
 85 |                    AND (funct3(0) = ncl_encode('0'))) then
 86 |                 -- funct3 = 110 = or
 87 |                 Dout <= rs1 OR rs2;
 88 |             elsif (    (funct3(2) = ncl_encode('1'))
 89 |                    AND ((funct3(1) OR funct3(0)) = ncl_encode('0'))) then
 90 |                 -- funct3 = 100 = xor
 91 |                 Dout <= rs1 XOR rs2;
 92 |             else
 93 |                 -- NULL output
 94 |                 Dout <= (others => (others => '0')); 
 95 |             end if;
 96 |         elsif ( rtype = ncl_encode('0') ) then
 97 |             -- I-type opcode
 98 |             if ((funct3(2) AND funct3(1) AND funct3(0)) = ncl_encode('1')) then
 99 |                 -- funct3 = 111 is AND              
100 |                 Dout <= (11 downto 0 => rs1(11 downto 0) AND imm);
101 |                 -- Sign extend
102 |                 for i in Dout'HIGH downto 12 loop
103 |                     Dout(i) <= rs1(i) AND imm(11);
104 |                 end loop;
105 |             elsif (    ((funct3(2) AND funct3(1)) = ncl_encode('1'))
106 |                    AND (funct3(0) = ncl_encode('0'))) then
107 |                 -- funct3 = 110 = or
108 |                 Dout <= (11 downto 0 => rs1(11 downto 0) OR imm);
109 |                 -- Sign extend
110 |                 for i in Dout'HIGH downto 12 loop
111 |                     Dout(i) <= rs1(i) OR imm(11);
112 |                 end loop;
113 |             elsif (    (funct3(2) = ncl_encode('1'))
114 |                    AND ((funct3(1) OR funct3(0)) = ncl_encode('0'))) then
115 |                 -- funct3 = 100 = xor
116 |                 Dout <= (11 downto 0 => rs1(11 downto 0) XOR imm);
117 |                 -- Sign extend
118 |                 for i in Dout'HIGH downto 12 loop
119 |                     Dout(i) <= rs1(i) XOR imm(11);
120 |                 end loop;
121 |             else
122 |                 -- NULL output
123 |                 Dout <= (others => (others => '0')); 
124 |             end if;        
125 |         else
126 |             -- NULL output
127 |             Dout <= (others => (others => '0'));
128 |         end if;
129 |     end process bitmask;
130 | end riscv_i_async_bitmask;


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/ALU/I/shift.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | -- Shift instructions, including:
 3 | --
 4 | -- RV32I
 5 | --   SLLI   Shift Left Logical Immediate (32)
 6 | --   SRLI   Shift Right Logical Immediate (32)
 7 | --   SRAI   Shift Right Arithmetic Immediate (32)
 8 | --   SLL    Shift Left Logical (32)
 9 | --   SRL    Shift Right Logical (32)
10 | --   SRA    Shift Right Arithmetic (32)
11 | --
12 | -- RV64I
13 | --   SLLI   Shift Left Logical Immediate (64)
14 | --   SRLI   Shift Right Logical Immediate (64)
15 | --   SRAI   Shift Right Arithmetic Immediate (64)
16 | --   SLL    Shift Left Logical (64)
17 | --   SRL    Shift Right Logical (64)
18 | --   SRA    Shift Right Arithmetic (64)
19 | --   SLLIW  SLLI (32)
20 | --   SRLIW  SRLI (32)
21 | --   SRAIW  SRAI (32)
22 | --   SLLW   SLL (32)
23 | --   SRLW   SRL (32)
24 | --   SRAW   SRA (32)
25 | --
26 | -- RV128I
27 | --   TBA
28 | 
29 | library IEEE;
30 | use IEEE.std_logic_1164.all;
31 | use work.ncl.all;
32 | 
33 | architecture riscv_insn_shift of riscv_insn is
34 | begin
35 |     -- XLEN will be 32, 64, or 128, and will instantiate a shifter
36 |     -- that many bits wide.
37 |     --
38 |     -- The barrel shifter can place bail-out circuits at each halving
39 |     -- of the bit width, e.g. with XLEN=128 and BitWidths=3, the
40 |     -- shifter can direct to output at 128, 64, or 32 bits.
41 |     barrel_shifter: entity e_barrel_shifter_ncl(a_barrel_shifter_ncl)
42 |     generic map (n             => XLEN,
43 |                  BitWidths => BitWidthCount );
44 | 
45 |     -- TODO:  send current bit width mode to barrel_shifter
46 | 
47 |     -- TODO:  
48 | 
49 | end architecture;
50 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/ALU/README.md:
--------------------------------------------------------------------------------
 1 | Arithmetic Logic Unit
 2 | =====================
 3 | 
 4 | The ALUs here implement RV32I and RV64I instructions.  Various configurations
 5 | may enable multiple copies of particular facilities (adders, multipliers,
 6 | incrementers), multi-port ALUs (for SMT or OOE), and other features.
 7 | 
 8 | ALUs execute instructions in the order and with the data they are given.
 9 | Out-of-order and speculative execution are carried out before sending
10 | instructions to the ALU.
11 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/ALU/alu.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | --
 3 | -- Highly-conceptual rough-in, very broken
 4 | library IEEE;
 5 | use IEEE.std_logic_1164.all;
 6 | use work.ncl.all;
 7 | 
 8 | -- Circuit to 
 9 | entity insn_output is
10 |     generic ( XLEN : positive );
11 |     port map (
12 |         -- The content to stick into rd
13 |         rd   : ncl_logic_vector(XLEN-1 downto 0);
14 |         -- The instruction itself, which contains
15 |         -- rd at [11:7], along with all information
16 |         -- about read and write targets
17 |         insn : ncl_logic_vector(31 downto 0)
18 |     );
19 | end insn;
20 | 
21 | entity insn_riscv_execution is
22 |     generic ( XLEN : positive );
23 |     port map (
24 |         rs1    : in ncl_logic_vector(XLEN-1 downto 0);
25 |         rs2    : in ncl_logic_vector(XLEN-1 downto 0);
26 |         insn   : in ncl_logic_vector(31 downto 0);
27 |         -- FIXME:  Need all the machine registers passed in
28 |         -- some readable manner so instructions can react to
29 |         -- the machine's mode.
30 |         --
31 |         -- MISA lets us at least check 
32 |         misa_r : in ncl_logic_vector(XLEN-1);
33 |         Rt, Wr : in std_logic;
34 |         -- rd is the actual output data
35 |         rd     : out ncl_logic_vector(XLEN-1 downto 0);
36 |         Rr, Wt : out std_logic
37 |     );
38 | end insn_riscv;
39 | 
40 | entity insn_riscv_decoder is
41 |     generic ( XLEN : positive );
42 |     port map (
43 |         insn    : in ncl_logic_vector(31 downto 0);
44 |         Rr, Wr  : in std_logic;
45 |         pc      : in ncl_logic_vector(XLEN-1 downto 0);
46 |         misa_r  : in ncl_logic_vector(XLEN-1);
47 |         -- Change this to actual not-crap
48 |         regfile : in ncl_logic_vector(15 downto 0);
49 |         -- rd is the actual output data
50 |         rd      : out ncl_logic_vector(XLEN-1 downto 0);
51 |         Rt, Wt  : out std_logic
52 |    );
53 | end insn_riscv_decoder;
54 | 
55 | architecture a_insn_riscv_decoder of insn_riscv_decoder is
56 |     signal data_rs1, data_rs2 = ncl_logic_vector(XLEN-1 downto 0);
57 |     signal Rinsn :
58 | begin
59 | 
60 |     andi_insn : entity insn_riscv_execution(insn_riscv_andi)
61 |         generic map ( XLEN => XLEN)
62 |         port map (
63 |         rs1    => data_rs1;
64 |         rs2    => data_rs2;
65 |         insn   => insn;
66 |         misa_r => misa_r;
67 | 
68 |         );
69 | 
70 | 
71 | end a_insn_riscv_decoder;
72 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/ALU/insn/insn_two_register.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | -- AND a sign-extended 12-bit immediate register
 3 | library IEEE;
 4 | use IEEE.std_logic_1164.all;
 5 | library async_ncl;
 6 | use async_ncl.ncl.all;
 7 | use work.e_ncl_logic_register;
 8 | use work.e_ncl_handshake_receiver;
 9 | 
10 | entity e_riscv_insn_async_2reg_infra is
11 |     generic ( XLEN : positive );
12 |     port (
13 |         -- Receiver port and handshake
14 |         rs1  : in  ncl_logic_vector(XLEN-1 downto 0);
15 |         rs2  : in  ncl_logic_vector(XLEN-1 downto 0);
16 |         insn : in  ncl_logic_vector(31 downto 0);
17 |         Rr   : out std_logic;
18 |         Wr   : in  std_logic;
19 |         -- Sender handshake
20 |         Rs   : in  std_logic;
21 |         Ws   : out std_logic;
22 |         -- Logic circuit:  buffered rs1, rs2, insn
23 |         rs1b : out ncl_logic_vector(XLEN-1 downto 0);
24 |         rs2b : out ncl_logic_vector(XLEN-1 downto 0);
25 |         insnb: out ncl_logic_vector(31 downto 0);
26 |         -- result from the logic circuit
27 |         rdl  : in  ncl_logic_vector(XLEN-1 downto 0)
28 |     );
29 | end e_riscv_insn_async_2reg_infra;
30 | 
31 | architecture riscv_insn_async_2reg_infra of e_riscv_insn_async_2reg_infra is
32 |     signal Din   : ncl_logic_vector( (rs1'LENGTH
33 |                                     + rs2'LENGTH
34 |                                     + insn'LENGTH)-1 downto 0);
35 |     -- Buffered into a delay-insensitive register
36 |     signal in_buffer : ncl_logic_vector( (  rs1'LENGTH
37 |                                           + rs2'LENGTH
38 |                                           + insn'LENGTH)-1 downto 0);
39 | 
40 |     alias r_rs1  : ncl_logic_vector((rs1'LENGTH)-1 downto 0) is
41 |                           in_buffer((rs1'LENGTH)-1 downto 0);
42 |                          
43 |     alias r_rs2  : ncl_logic_vector( (rs2'LENGTH)-1 downto 0) is
44 |                           in_buffer( (rs1'LENGTH
45 |                                     + rs2'LENGTH)-1 downto (rs1'LENGTH));
46 | 
47 |     alias r_insn : ncl_logic_vector( (insn'LENGTH)-1 downto 0) is
48 |                          in_buffer( (rs1'LENGTH
49 |                                    + rs2'LENGTH
50 |                                    + insn'LENGTH)-1 downto (rs1'LENGTH
51 |                                                           + rs2'LENGTH));
52 |     signal r_Enable : std_logic;
53 |     signal r_Clear  : std_logic;
54 |     signal r_Stored : std_logic;
55 |     -- Receiver handshake
56 |     signal r_hs_Enable : std_logic;
57 |     
58 | begin
59 | 
60 |     -- DI registered buffer 
61 |     r_buffer: entity e_ncl_logic_register(ncl_logic_register)
62 |         generic map (n => rs1'LENGTH + rs2'LENGTH + insn'LENGTH)
63 |         port map
64 |         (D      => Din,
65 |          Q      => in_buffer,
66 |          En     => r_Enable,
67 |          CLR    => r_Clear,
68 |          W      => Wr,
69 |          Stored => r_Stored
70 |         );
71 | 
72 |     -- Handshake to receive input data
73 |     hs_receiver: entity e_ncl_handshake_receiver(ncl_handshake_receiver)
74 |         port map (
75 |         Ready    => Rr,
76 |         -- Enable when nothing stored
77 |         En       => NOT r_Stored, -- FIXME:  Needs to come from the ICT component (yellow)
78 |         Waiting  => Wr,
79 |         EnOut    => r_Enable
80 |     );
81 | 
82 |     -- TODO:  Input completion test component
83 |     -- TODO:  Sender handshake component
84 |     -- TODO:  Flush signal
85 | 
86 |     -- TODO: Setup receiver handshake enable
87 | 
88 |     -- Set up r_buffer input signal
89 |     Din((rs1'LENGTH)-1 downto 0) <= rs1;
90 |     Din( (rs1'LENGTH
91 |         + rs2'LENGTH)-1 downto (rs1'LENGTH)) <= rs2;
92 |     Din( (rs1'LENGTH
93 |         + rs2'LENGTH
94 |         + insn'LENGTH)-1 downto (rs1'LENGTH
95 |                                + rs2'LENGTH)) <= insn;
96 | 
97 | end riscv_insn_async_2reg_infra;


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/README.md:
--------------------------------------------------------------------------------
  1 | Asynchronous CPU Components
  2 | ===========================
  3 | 
  4 | These CPU components are asynchronous.  They include adders, dividers,
  5 | pipelines, and other features.
  6 | 
  7 | # Major Architecture
  8 | 
  9 | Ultimately, this RISC-V implementation will use an entirely asynchronous
 10 | architecture.  This consumes significant area, largely due to routing;
 11 | however, routing is between directly-attached components generally, and
 12 | should not be a problem in and of itself.
 13 | 
 14 | In general, an Asynchronous CPU operates in a synchronous system as below:
 15 | ```
 16 |        __________________________________________________________
 17 |       |   _______________             ________________________   |
 18 |   CLK-|--|  Transceiver  |=Handshake=| Asynchronous circuitry |  |
 19 | D0..n=|==|               |=D[0]0..n==|                        |  |
 20 |       |  |               |=D[1]0..n==|                        |  |
 21 |       |  |_______________|           |________________________|  |
 22 |       |__________________________________________________________|
 23 | ```
 24 | Above, a transceiver operates as a clocked (synchronous) component and an
 25 | unclocked (asynchronous) component.  The asynchronous side experiences delay
 26 | controlled by the clock, but uses the asynchronous protocol.
 27 | 
 28 | Marcos Luiggi Lemos Sartori of the Pontifical Catholic University of Rio
 29 | Grande do Sul once [wrote](https://www.inf.pucrs.br/~calazans/publications/2017_MarcosSartori_EoTW.pdf):
 30 | 
 31 | > As far as the Author knows, this is both the first asynchronous RISC-V
 32 | > implementation and the first use of Go as a hardware description language.
 33 | 
 34 | The [ARV implementation](https://github.com/marlls1989/arv) appears to be a
 35 | RISC-V emulator written in Go, although the author notes:
 36 | 
 37 | > A smart asynchronous synthesis tool can extract the intended behaviour
 38 | > from the high level model and implement it in any such template.
 39 | 
 40 | So far as we are aware, the VHDL implementation presented here is the first
 41 | asynchronous RISC-V CPU hardware implementation, and the first using unbroken
 42 | NULL Convention Logic to implement delay-insensitive components.  Unlike
 43 | [previous work by Christensen, Jensen, Jorger, and Sparsø](https://backend.orbit.dtu.dk/ws/portalfiles/portal/4361393/Christensen.pdf), which implemented
 44 | an asynchronous TinyRISC™ TR41401 via delay elements, the RISC-V implementation
 45 | here uses NULL Convention Logic (NCL) and delay-insensitive registers to
 46 | overcome timing issues.
 47 | 
 48 | This CPU requires transceivers at every memory access point, including to
 49 | access any BRAM used as cache, DRAM used as main memory, or internal DSP and
 50 | multiplier facilities.  It provides a full VHDL implementation of all
 51 | facilities except internal cache to facilitate synthesization as an ASIC;
 52 | configurable support for internal use of FPGA facilities is included.
 53 | 
 54 | # Handshake
 55 | 
 56 | A completion-detection handshake allows for delay-insensitive components.  Such components are attached as such:
 57 | ```
 58 |      Sender           Receiver
 59 |  _______________   _______________
 60 | | Ready    (in) |-| Ready   (out) |
 61 | | Waiting (out) |-| Waiting  (in) |
 62 | | d[0..x] (out) |=| d[0..x]  (in) |
 63 | |_______________| |_______________|
 64 | ```
 65 | A strict handshake protocol ensures transitions on each side follow a state
 66 | machine in which data must be acknowledged seen, then not seen; sent, then
 67 | not sent; and so forth.  This protocol ensures each sender holds the data
 68 | lines stable until the recipient acknowldeges it has a stable copy of the data,
 69 | and only sends data when a recipient *is* ready to receive data.
 70 | 
 71 | # NULL Convention Logic
 72 | 
 73 | Asynchronous components use a form of one-hot logic called NULL Convention
 74 | Logic.  Each bit has one of the following states:
 75 | 
 76 | ```
 77 | High  Low  Value
 78 |    0    0   NULL
 79 |    1    0      0
 80 |    0    1      1
 81 | ```
 82 | 
 83 | The `[1 1]` signal is invalid.  Completion detection circuits wait for all
 84 | bits to see `High XOR Low = 1` before signaling the completion of some action.
 85 | 
 86 | # Asynchronous Process
 87 | 
 88 | The asynchronous process relies on both the handshake and NCL to function.
 89 | 
 90 | Consider the below:
 91 | ```
 92 |      Sender                   Adder                    Consumer
 93 |  _______________   ______________________________   ______________
 94 | | Ready    (in) |-| Ready   (out)   Ready    (in)|-| Ready  (out) |
 95 | | Waiting (out) |-| Waiting  (in)   Waiting (out)|-| Waiting (in) |
 96 | | d[0..x] (out) |=| d[0..x]  (in)   d[0..x] (out)|=| d[0..x] (in) |
 97 | |_______________| |______________________________| |______________|
 98 | ```
 99 | Above, the **Sender** sends a computation to the **Adder**, which sends the
100 | result to the **Consumer**.
101 | 
102 | Overall, an asynchronous component has the below general block diagram:
103 | 
104 | ![Asynchronous component block diagram](async_component_block_diagram.png)
105 | 
106 | Think of the fancy parallel prefix adder as below:
107 | ```
108 |         [Input]   (in) Waiting,  (Out) Ready
109 |         |  |  |
110 |        [Register]
111 |         |  |  |  * Completion: input
112 |         G  G  G
113 |         | /| /|
114 |         G  G  |
115 |         | /|  |
116 |         G  |  |
117 |       / |  |  | * Completion: output
118 |      [  Output  ] (Out) Waiting, (In) Ready
119 | 
120 | ```
121 | The component needs its data input to remain in place until its data output
122 | is complete and no longer needed by the receiver of this output.  That means
123 | all circuits must complete before this can propagate down.
124 | 
125 | The asynchronous register stores the data in a delay-insensitive manner (see
126 | [the handshake components](handshake/), allowing the handshake to immediately
127 | finish while the component processes the data.  The component becomes ready
128 | for new data as soon as the next component has likewise stored the output
129 | and signaled it has done so (by clearing `Ready`).
130 | 
131 | This coordination is necessary to ensure asynchronous components do not get
132 | out of sync and produce bad data.  Clocked circuits assume every component
133 | does its part in one clock cycle, while asynchronous circuits move data as
134 | soon as the sender is ready to send and the receiver is ready to receive.
135 | This can vary with electrical characteristics, temperature, and which
136 | component is in use—parallel adders, slow multipliers, fast incrementers,
137 | all with different amounts of delay, and all operating at full speed rather
138 | than at the speed of the slowest, even when those speeds change.
139 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/adders/adder.vhdl:
--------------------------------------------------------------------------------
  1 | -- adder components
  2 | --
  3 | -- These are parts of adders
  4 | library IEEE;
  5 | use IEEE.std_logic_1164.all;
  6 | library async_ncl;
  7 | use async_ncl.ncl.all;
  8 | 
  9 | -- Binary adder
 10 | --
 11 | --  Ripple-Carry:
 12 | --
 13 | --        A   B
 14 | --        |   |
 15 | --       -------
 16 | -- Cout-| Adder |-Cin
 17 | --       -------
 18 | --          |
 19 | --          S
 20 | --
 21 | -- Parallel prefix:
 22 | --
 23 | --            A   B
 24 | --            |   |
 25 | --           -------
 26 | -- Cout (G)-| Adder |-Cin (G[n-1])
 27 | --           -------
 28 | --              |
 29 | --              S (P)
 30 | --
 31 | -- Parallel prefix adder sends P to an XOR gate along with Cin
 32 | -- (final output from last stage, so it has the same interface.
 33 | -- In architecture, G would be sent to Cout, P sent to MUX.
 34 | entity binary_adder_ncl_entity is
 35 | port(
 36 |     A     : in  ncl_logic;
 37 |     B     : in  ncl_logic;
 38 |     Cin   : in  ncl_logic;
 39 |     Cout  : out ncl_logic;
 40 |     S     : out ncl_logic
 41 |     );
 42 | end binary_adder_ncl_entity;
 43 | 
 44 | library IEEE;
 45 | use IEEE.std_logic_1164.all;
 46 | library async_ncl;
 47 | use async_ncl.ncl.all;
 48 | -- There are two forms of this.  All but the last for a given
 49 | -- bit are as follows:
 50 | --
 51 | --  G Gin P Pin
 52 | --  | |   | |
 53 | --  | AND-| |
 54 | --  | |   AND
 55 | --  XOR    |
 56 | --   |     |
 57 | --  Gout  Pout
 58 | --
 59 | -- The last stage is as follows:
 60 | --
 61 | --  G Gin P
 62 | --  | |   |
 63 | --  | AND-
 64 | --  | |
 65 | --  XOR
 66 | --   |
 67 | --  Gout
 68 | entity binary_adder_pg_mux_ncl_entity is
 69 | port (
 70 |     P     : in  ncl_logic;
 71 |     G     : in  ncl_logic;
 72 |     Pin   : in  ncl_logic;
 73 |     Gin   : in  ncl_logic;
 74 |     Pout  : out ncl_logic;
 75 |     Gout  : out ncl_logic
 76 |     );
 77 | end binary_adder_pg_mux_ncl_entity;
 78 | 
 79 | -- A simple full adder.
 80 | --
 81 | -- A-------A
 82 | -- |       N-----
 83 | -- | B-----D     |
 84 | -- | |           |
 85 | -- XOR           |
 86 | --   |-------A   |
 87 | --   |       N--OR
 88 | --   | CIN---D   |
 89 | --   | |         |
 90 | --   XOR         |
 91 | --    |          |
 92 | --    S        Cout
 93 | --
 94 | -- All computations require NCL-complete input signals and pass
 95 | -- NULL if any signal is incomplete.  This prevents invalid output.
 96 | architecture binary_adder_ncl_fulladder_arch of binary_adder_ncl_entity is
 97 | begin
 98 |     -- S bit is A XOR B XOR Cin; output NULL if A or B is null
 99 |     S    <= A XOR B XOR Cin;
100 |     -- Cout is (A AND B) OR ((A XOR B) AND Cin); output NULL if null
101 |     Cout <= (A AND B) OR ((A XOR B) AND Cin);
102 | end binary_adder_ncl_fulladder_arch;
103 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/adders/adders.md:
--------------------------------------------------------------------------------
 1 | Adders
 2 | ======
 3 | 
 4 | Various adders are available, using various amount of space and operating
 5 | at various speeds.
 6 | 
 7 | # Speculative Adders
 8 | 
 9 | Speculative adders take up additional space, but operate at higher frequencies.
10 | They can run at higher fmax in synchronous circuits, and in less time in
11 | asynchronous circuits.
12 | 
13 | Asynchronous adders require additional space, but have enormous advantages in
14 | asynchronous circuits.
15 | 
16 | In synchronous circuits, if the fmax of the adder is higher than the fmax of
17 | the CPU in general, the adder can be clocked higher and latch its output to
18 | provide the addition in one CPU clock cycle instead of two when speculation
19 | produces error.  Speculative adders have an error probability on the order of
20 | 10^-5, so this rarely happens and is not worth the additional space.
21 | 
22 | In a CPU with an asynchronous pipeline, a clocked speculative adder can run at
23 | high speed to the same benefit, with a clock rate independent of the CPU.  An
24 | asynchronous speculative adder can return a result immediately upon completion,
25 | with negligible additional delay when speculation fails.  Synchronous
26 | speculative adders with lower delay but higher error probability can require
27 | several clock cycles to recover; while asynchronous highly-speculative adders
28 | can take advantage of early completion.
29 | 
30 | ## Han-Carlson
31 | 
32 | The Han-Carlson Speculative Adder shortens the critical path by one stage.  It
33 | detects and corrects for error in the rare case of an error.  This adder
34 | consumes minimal area and has a high fmax.
35 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/adders/shcadder.vhdl:
--------------------------------------------------------------------------------
 1 | -- Speculative Han-Carlson adder
 2 | --
 3 | -- A one-bit full adder looks as below:
 4 | --
 5 | --   S    <= A XOR B XOR Cin
 6 | --   Cout <= (A AND B) OR (B AND Cin) OR (Cin AND A)
 7 | --
 8 | -- A different adder uses three circuits.
 9 | --
10 | -- Adder:
11 | --
12 | --   A     : (in)
13 | --   B     : (in)
14 | --   G     : (out)
15 | --   P     : (out)
16 | --   G     <= A AND B
17 | --   P     <= A XOR B
18 | --
19 | -- Propagate:
20 | --
21 | --   Gin   : (in)
22 | --   Pin   : (in)
23 | --   Cin   : (in)
24 | --   PCin  : (in)
25 | --   Gout  <= (Pin AND Cin) XOR Gin
26 | --   Pout  <= Pin AND PCin
27 | --
28 | -- Sum bit:
29 | --
30 | --   Pin   : (in)
31 | --   Cin   : (in)
32 | --   S     : (out)
33 | --   S     <= Pin XOR Cin
34 | --
35 | -- P from the Adder goes to the Sum bit.  Gout from the Adder goes to
36 | -- Cin on the NEXT Propagator.  The final propagated Gout goes to Cin on
37 | -- the NEXT Sum bit.
38 | --
39 | -- These propagate forward a bunch, creating a complex mess.  Han-Carlson
40 | -- simply shortcuts some of this:
41 | --
42 | -- For every even bit, Gout and Pout from the final Propagate cycle begin
43 | -- forwarding to PCin in the next stage at each power of two.  That is:
44 | -- Bit 0 sends its (G,P) from Input to Stage 1 of bit 1, which sends its
45 | -- (G,P) from Stage 1 to Stage 2 of Bit 3, which sends its (G,P) from 
46 | -- Stage 2 to Stage 3 of Bit 7, and so forth.  In the final stage, the
47 | -- odd bits propagate their (P,G) to the outputs.
48 | --
49 | -- Each bit has to propagate to each other bit.  At Stage 1, Bit 1
50 | -- propagates to Stage 2 of Bit 3; at Stage 2, Bit 1 propagates to Stage
51 | -- 3 of Bit 5.  This is because Stage 3 of Bit 3 propagates to Stage 4 of
52 | -- Bit 7, and so Bit 5 carries no information about Bit 1!  Notably, Bit 2
53 | -- propagates to 3, then 5, but this propagation does not bring any
54 | -- information about Bit 1.  The final stage propagates Bit 1 to Bit 2,
55 | -- which is the first time Bit 2 receives information about Bit 1.
56 | --
57 | -- Speculative Han-Carlson skips the propagation stage before the last.
58 | -- For a 16-bit adder, Bit 7 Stage 3 never propagates to Bit 15 Stage 4;
59 | -- rather it directly propagates to Bit 8 output.
60 | --
61 | -- Just before the output stage, speculative Han-Carlson tests all the
62 | -- odd-numbered bits:
63 | --
64 | --   D     : (in) [15 downto 0]
65 | --   Error : (out)
66 | --   Error <= ((D[1] AND D[9]) XOR (D[3] AND D[11])) XOR
67 | --            ((D[5] AND D[13]) XOR (D[7] AND [D15]))
68 | --
69 | -- When an error is detected, the last stage is computed.  Errors are
70 | -- fairly infrequent, so the fast path usually occurs.  The adder also
71 | -- reduces the amount of space needed.
72 | --
73 | -- Each component can also use a two-way state signal rather than the
74 | -- adder running on a clock.  This essentially propagates a "done"
75 | -- signal.  Such an adder can begin computing new addition before prior
76 | -- signals have fully propagated and effectively pipeline additions.
77 | 
78 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/async_component_block_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrmoserbaltimore/risc-v-cpu-asynchronous/3c0864c1b023da5a7b3475c484f0aca1b9310e09/RISC-V.srcs/asynchronous/cpu/async_component_block_diagram.png


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/pipelines/README.md:
--------------------------------------------------------------------------------
1 | Pipelines
2 | =========
3 | 
4 | These pipelines provide various facilities, such as a simple pipeline;
5 | out-of-order execution; or speculative execution.
6 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/pipelines/ooe-pipeline.md:
--------------------------------------------------------------------------------
 1 | Simple Out-of-Order Execution Pipeline
 2 | ======================================
 3 | 
 4 | This pipeline extends the Simple Pipeline to include out-of-order execution.
 5 | 
 6 | # Pipeline staging
 7 | ```
 8 | [Fetch]
 9 |        [LRW]
10 |             [Load]
11 |                   [ULR]
12 |                        [Decode]
13 |                                [Execute]
14 |                                         [Store]
15 |                                                [Retire]
16 | ```
17 | In this pipeline, each instruction takes both read and write locks.  As in
18 | the simple pipeline, locks are taken before Load; however, both read and
19 | write lock counts are tracked.
20 | 
21 | Speculative execution and branch prediction are unsupported by this pipeline.
22 | As the `Fetch` stage must use and update `pc`, `Fetch` sends the current `pc`
23 | with the fetched instruction.  The `Fetch` stage occurs in order.
24 | 
25 | The `LRW` stage takes Read and Write locks.  `LRW` stalls any instructions
26 | reading or writing data under write lock; the stalled instruction is placed
27 | into a buffer, and the next instruction goes into `LRW`.  The next instruction
28 | stalls both by normal locks and by having a locking contention with the
29 | buffered instruction.  If the next instruction stalls, `LRW` stalls entirely
30 | until the buffer is free; otherwise the instruction continues as normal.
31 | 
32 | This process allows simple out-of-order instruction execution for most RV32I
33 | and RV64I instructions.  RISC-V instructions generally don't have
34 | side-effects, such as setting status flag registers, so their order is
35 | generally unimportant.
36 | 
37 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/pipelines/simple-pipeline.md:
--------------------------------------------------------------------------------
 1 | Pipeline design
 2 | ===============
 3 | 
 4 | The pipeline takes a minimalist approach.
 5 | 
 6 | 
 7 | ```
 8 | [Fetch]
 9 |        [Decode]
10 |                [Semaphore]
11 |                           [Load]
12 | 	                        [Execute]
13 | 		                         [Store]
14 | 				                [Retire]
15 | ```
16 | The Semaphore is an atomic locking operation to keep computations in-order,
17 | activated before Load and on Retire.  In pseudocode:
18 | 
19 | ```
20 | RWSemaphore(Resource, Read, Write)
21 |   if Read
22 |     ReadLock(Resource)
23 |   if Write
24 |     WriteLock(Resource)
25 | 
26 | WriteLock(Resource)
27 |   NoWaitWriteLock(Resource)
28 | 
29 | ReadLock(Resource)
30 |   WaitForWriteUnlock(Resource)
31 | ```
32 | No locking occurs before the first semaphore stage.  Write locks do not block
33 | when taken because all read locks from earlier instructions will be closed out
34 | before the current instruction reaches the Store stage.
35 | 
36 | Read locks block when a write lock is held on the resource.  This prevents the
37 | Load until the write lock is released.  Write locks increment and decrement
38 | for this reason:  multiple writes to a resource *without* reads will pipeline
39 | multiple non-blocked write locks.  All writes must complete before a further
40 | read can occur.  Only out-of-order execution environments need to track read
41 | locks.
42 | 
43 | This behavior also means taking a read lock first avoids blocking on the
44 | instruction's own write lock, avoiding read-and-write lock logic.
45 | 
46 | In the Retire stage, the write lock is atomically decremented.  When the write
47 | lock hits zero, any instruction blocked at Semaphore waiting for that resource
48 | continues its execution.
49 | 
50 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/pipelines/simple-pipeline.vhdl:
--------------------------------------------------------------------------------
 1 | -- pipeline
 2 | --
 3 | -- A pipeline component carries out a particular stage in a pipeline.
 4 | -- The component provides stage-to-stage synchronization.
 5 | --
 6 | -- In this way, the pipeline is clockless.  The pipeline may rely on external
 7 | -- clocked components and so may in practice wait for those.
 8 | --
 9 | -- Note that the pipelined component must handle all interdependencies.  If
10 | -- an operation relies on a prior operation completing, it must coordinate
11 | -- with the further stages of the pipeline.  For example:  if an instruction
12 | -- decodes with a write, the write must be notated in some kind of semaphore
13 | -- system, and reads and writes must stall.
14 | --
15 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/roadmap.md:
--------------------------------------------------------------------------------
  1 | Roadmap for CPU
  2 | ===============
  3 | 
  4 | # Minimal CPU
  5 | 
  6 | The steps to create a working RISC-V RV32I implementation are simple:
  7 | 
  8 | 1.  Asynchronous component infrastructure
  9 |  * Handshake
 10 |  * Asynchronous register
 11 |  * Sync-Async transceiver
 12 | 2. Infrastructure
 13 |  * Register file
 14 |  * Instruction fetch
 15 |  * Instruction decoder
 16 | 3. Asynchronous RAM bus interface
 17 |  * Interface with synchronous memory
 18 | 4.  Asynchronous components
 19 |  * Adder
 20 |  * Incrementer (toggle bits until encountering the firts 0 bit)
 21 |  * 2's complement
 22 | 5.  Basic asynchronous instruction implementations
 23 |  * Canonical NOP instruction
 24 |    * Detect `ADDI x0, x0, 0` and silently abort the insn
 25 |    * Other writes to `x0` are `HINT` insns
 26 |  * Execute-circuit implementations
 27 |    * Load (`LW`, `LUI`)
 28 |    * Sign-extending Load (`LB`, `LH`, `LBU`, `LHU`)
 29 |    * Store (`SW`, `SH`, `SB`)
 30 |    * Bitwise logic (`AND`, `OR`, `XOR`, `ANDI`, `ORI`, `XORI`)
 31 |    * Bit shifters (`SLLI`, `SRLI`, `SRAI`, `SLL`, `SRL`, `SRA`)
 32 |    * Arithmetic (`ADD`, `SUB`, `ADDI`)
 33 |    * Branch (`BEQ`, `BNE`, `BLT`, `BGE`, `BLTU`, `BGEU`)
 34 |    * Control flow (`AUIPC`, `JAL`, `JALR`)
 35 |    * Comparison instructions (`SLTI`, `SLTIU`, `SLT`, `SLTU`)
 36 | 6.  Asynchronous ALU
 37 | 7.  Asynchronous pipeline
 38 |  * Fetch
 39 |  * Decode
 40 |  * Locking
 41 |  * Load
 42 |  * Execute
 43 |  * Retire
 44 | 
 45 | The above implements all the RV32I instructions except `FENCE`.  This
 46 | does not, however, implement machine mode:  the CPU is not a proper
 47 | RISC-V CPU.  With the above implemented, test RISC-V code can run on
 48 | the core.
 49 | 
 50 | # Machine-Mode
 51 | 
 52 | To implement a machine-mode RISC-V CPU, we need more infrastructure:
 53 | 
 54 | 1.  Machine-mode CSRs
 55 |  * `misa`
 56 |  * `mvendorid`
 57 |  * `marchid`
 58 |  * `mimpid`
 59 |  * `mhartid`
 60 |  * `mstatus`
 61 |  * `mstatush`
 62 |  * `mdeleg`
 63 |  * `mideleg`
 64 |  * `mip`
 65 |  * `mie`
 66 |  * `mtime`
 67 |  * `mtimecmp`
 68 |  * `mcycle`
 69 |  * `minstret`
 70 |  * `mcounteren`
 71 |  * `mcountinhibit`
 72 |  * `mscratch`
 73 |  * `mepc`
 74 |  * `mcause`
 75 |  * `mtval`
 76 | 2.  Machine-level ISA
 77 |  * Environment call (`ECALL`, `EBREAK`)
 78 |  * Trap-return (`MRET`, `SRET`)
 79 |  * Wait for interrupt (`WFI`)
 80 | 3.  Machine-level infrastructure
 81 |  * Reset state
 82 |  * NMI
 83 |  * Physical memory considerations
 84 |  * Memory protection
 85 |  * Paging
 86 | 3.  `FENCE` instruction to complete RV32I
 87 | 
 88 | Machine mode is not overly complex, but does carry a lot of infrastructure.
 89 | 
 90 | # Supervisor mode
 91 | 
 92 | Supervisor mode extends a CPU with machine mode, providing all the facilities
 93 | to run a modern Linux operating system.
 94 | 
 95 | # RV32M extension
 96 | 
 97 | Multiply and Divide add additional instructions and multipliers.
 98 | 
 99 | 1.  Infrastructure
100 |  * Multipliers
101 |  * Dividers (Paravartya using multipliers)
102 | 2.  Instructions
103 |  * Multiplication (`MUL`, `MULH`, `MULHSU`, `MULHU`)
104 |  * Division (`DIV`, `DIVU`, `REM`, `REMU`)
105 | 
106 | # RV64IM
107 | 
108 | RV64I extends the addressing space and register size in 64-bit mode, and adds
109 | a few 64-bit instructions.
110 | 
111 | 1.  Infrastructure
112 |  * 64-bit flag and proper behavior
113 |  * Decoder
114 | 2.  Instructions
115 |  * 64-bit load/store (`LD`, `SD`)
116 |  * 32-bit W instructions
117 |  * Adjustments to base instructions for 64-bit operation
118 | 
119 | Implementation of RV64M on top of all the above provides a full 64-bit
120 | asynchronous RISC-V processor, albeit without floating point.
121 | 
122 | # Hypervisor Mode
123 | 
124 | The hypervisor extension is in draft as of RISC-V privileged architectures
125 | V1.12 draft.
126 | 
127 | Hypervisor extensions add a significant amount of infrastructure and
128 | instructions to the CPU and are far more challenging to implement than
129 | Supervisor-mode extensions.
130 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/shifters/barrel_shifter.vhdl:
--------------------------------------------------------------------------------
  1 | -- vim: ts=4 sw=4 et
  2 | -- Barrel shifter
  3 | --
  4 | -- n-bit NCL barrel shifter with arithmetic right-shift
  5 | library IEEE;
  6 | use IEEE.std_logic_1164.all;
  7 | use IEEE.math_real."ceil";
  8 | use ieee.math_real."log2";
  9 | library async_ncl;
 10 | use async_ncl.ncl.all;
 11 | 
 12 | -- NCL 2:1 mux
 13 | --
 14 | -- Spurious outputs (glitch) happen if you use a non-NCL bit for
 15 | -- the shifter, so yes the MUX selector input has to be NCL.
 16 | --
 17 | --    Bit 1    Bit 0
 18 | --    |   |    |   |
 19 | --  ------------------
 20 | --  \                /-- NCL
 21 | --   \     OUT      /---Select
 22 | --     ------------
 23 | --         | |
 24 | --
 25 | 
 26 | 
 27 | -- Barrel shifter
 28 | --
 29 | -- Select bit of '1' selects the left (input) bit.
 30 | --
 31 | -- bit  4   3   2   1
 32 | --      |  -|  -|  -|-   --Arithmetic Shift
 33 | --      | | | | | | | | |
 34 | --      | | | | | | | AND
 35 | --      | | | | | | |  |
 36 | --      | | | | | | |  Sx   Sign-extend bit
 37 | --      | | | | | | |
 38 | --      | | | | | | | Sx
 39 | --      | | | | | | | |
 40 | --      MUX MUX MUX MUX--Select bit 0 (To all stage-1 MUX)
 41 | --      |  -|---+   |
 42 | --      | | |  -|---+
 43 | --      | | | | |  -|-+---Sx
 44 | --      | | | | | | | |
 45 | --      MUX MUX MUX MUX--Select bit 1 (To all stage-2 MUX)
 46 | --      |   |   |   |
 47 | --      |  -|-+-|-+-|-+---Sx
 48 | --      | | | | | | | |
 49 | --      MUX MUX MUX MUX--Select bit 2 (To all stage-3 MUX)
 50 | --       |   |   |   |
 51 | --
 52 | -- In theory, it's faster to take the first stage if the shift is
 53 | -- all off or all on, but that's more tests and gates.
 54 | --
 55 | -- Barrel shifter r2 only has to be log(xlen), e.g 5 for 32-bit,
 56 | -- 6 for 64-bit, 7 for 128-bit.
 57 | entity e_barrel_shifter_ncl is
 58 | -- Only feed this a power of 2!
 59 |     generic ( XLEN      : positive;
 60 |               BitWidths : positive);
 61 |     port(
 62 |         Din        : in  ncl_logic_vector(XLEN-1 downto 0);
 63 |         Shift   : in  ncl_logic_vector(integer(ceil(log2(real(XLEN))))-1 downto 0);
 64 |         ShRight    : in  ncl_logic;
 65 |         Arithmetic : in  ncl_logic;
 66 |         BitWidth   : in  ncl_logic_vector(BitWidths-1 downto 0);
 67 |         Dout       : out ncl_logic_vector(XLEN-1 downto 0)
 68 |     );
 69 | end e_barrel_shifter_ncl;
 70 | 
 71 | -- All computations require NCL-complete input signals and pass
 72 | -- NULL if any signal is incomplete.  This prevents invalid output.
 73 | --
 74 | -- This barrel shifter is reversible by using n muxes on input and
 75 | -- output to reverse the bit order (reverse input, shift left,
 76 | -- reverse output).
 77 | architecture barrel_shifter_ncl of e_barrel_shifter_ncl is
 78 |     type tree_array is array (Shift'HIGH downto SHIFT'LOW-1) of ncl_logic_vector(XLEN-1 downto 0);
 79 |     signal tree : tree_array := (others => (others => ('0', '0')));
 80 |     signal SignEx : ncl_logic;
 81 |     signal result : ncl_logic_vector(XLEN-1 downto 0);
 82 | begin
 83 |     
 84 |     -- This thing is actually inherently combinatorial
 85 |     barrel: process(all) is
 86 |     variable BWNumeric : integer := 0;
 87 |     variable MSBidx    : integer := XLEN-1;
 88 |     begin
 89 |         -- Find the bit divisor
 90 |         -- If the MSB in BitWidth is set, then use full width.
 91 |         -- If MSB-1 is set, half width.
 92 |         -- If MSB-2, quarter width.
 93 |         -- So on.
 94 |         --
 95 |         -- This works by returning 0, 1, and 2, respectively,
 96 |         -- for the three above.  2**0 = 1, 2**1 = 2, 2**2 = 4.
 97 |         -- This gives us both results. 
 98 |         for i in BitWidth'HIGH downto BitWidth'LOW loop
 99 |             if (BitWidth(i) = '1') then
100 |                 BWNumeric := BitWidth'HIGH - i;
101 |                 exit;
102 |             end if;
103 |         end loop;
104 |         
105 |         -- Figure out the index of the most significant bit
106 |         --
107 |         -- e.g. RV128I and we're doing a 32-bit shift:
108 |         --   BitWidth = "001"
109 |         --   BWHigh   = 2 - 0 = 2
110 |         --   Din(((127+1) / (2^^2)) - 1)
111 |         --     = Din((128 / 4) - 1)
112 |         --     = Din(31)   -- i.e. (31 downto 0) 
113 |         MSBidx := ((Din'HIGH+1) / (2**BWNumeric)) - 1;
114 |  
115 |         --  SignBit Arithmetic
116 |         --        | |
117 |         --        AND ShRight
118 |         --          | |
119 |         --          AND
120 |         --           |
121 |         --          All shifted-out MUXes
122 |         --
123 |         -- NULL if any of these are NULL, so incorporates the
124 |         -- ShRight check.
125 |         SignEx          <= Din(MSBidx) AND Arithmetic AND ShRight;
126 | 
127 |         if (ncl_is_null(BitWidth) OR ncl_is_null(SignEx)) then
128 |              -- if we don't check this, we might just use BWNumeric
129 |              -- as derived above erroneously and get bad results.
130 |              -- Same if we never check Arithmetic and ShRight.
131 |              --
132 |              -- Until then we null the top of the tree, since no
133 |              -- actual combinatorial circuit along the way CHECKS
134 |              -- if BitWidth is null, and so will produce spurious
135 |              -- non-null output otherwise.
136 |              tree(-1) <= (others => ('0','0'));
137 |         elsif (Shift(Shift'HIGH - BWNumeric) = '1') then
138 |             -- If last shift bit is high, it shifts out to zero, so
139 |             -- just set all output to zero.  Also true if arithmetic.
140 |             --
141 |             -- Fun fact: no matter what the input, this is the
142 |             -- result; so it's actually reasonable to drop the
143 |             -- Ready signal and tell the component sending the
144 |             -- shift that you've received the data as soon as
145 |             -- Shift() has that bit on.
146 |             --
147 |             -- This also applies in lower XLEN, such as when
148 |             -- a 64-bit processor running in 32-bit mode 
149 |             -- or calling a 32-bit shift sets bit 6 rather.
150 |             -- For narrower BitWidth, this does exactly that,
151 |             -- e.g. 1/4 width BWNumeric = 2, so instead of
152 |             -- bit 8 in 128-bit, we check bit 6 (32-bit)
153 |             --
154 |             -- THIS IS A 0 OUTPUT, NOT A NULL OUTPUT.
155 |             Dout <= (others => ncl_encode('0'));
156 |         else
157 |             if (ShRight = '0') then
158 |                 -- Put Din into the top of the tree to avoid breaking out special
159 |                 -- handling for the first row.  The "top" is basically tree(-1).
160 |                 tree(Shift'LOW-1) <= Din;
161 |             elsif (ShRight = '1') then
162 |                 -- Put it in backwards.  This should just be a row of muxes.
163 |                 for j in Din'RANGE loop
164 |                     
165 |                     -- Assign Din(0) to tree(-1)(127)
166 |                     -- Assign SignEx to tree(-1)(32) when we're using 32-bit
167 |                     -- instructions or modes on 64-bit or 128-bit platforms
168 |                     -- etc.
169 |                     --
170 |                     -- Accordingly, we want the most significant bit down.
171 |                     tree(Shift'LOW-1)(Din'HIGH - j) <=      SignEx WHEN j > MSBidx
172 |                                                        ELSE Din(j);
173 |                 end loop;
174 |             end if;
175 | 
176 |             -- It's going to compute them all in parallel;
177 |             -- combinatorial logic is not any faster by using
178 |             -- j in MSBidx downto 0            
179 |             for i in Shift'HIGH - BWNumeric downto Shift'LOW loop
180 |                 for j in Din'RANGE loop
181 |                     if (j <= 2**i) then
182 |                         -- Sign-extend
183 |                         -- This will actually test the Arithmetic and
184 |                         -- ShRight bits for non-NULL status.
185 |                         tree(i)(j) <=    (tree(i-1)(j) AND NOT Shift(i))
186 |                                             OR (SignEx AND Shift(i));
187 |                     else
188 |                         -- This part will NOT check Arithmetic or
189 |                         -- ShRight, which can lead to spurious outputs in
190 |                         -- contrived situations given valid input and handshake,
191 |                         -- hence the explicit SignEx NULL check above. 
192 |                         --
193 |                         -- If shift bit not on, take this column;
194 |                         -- if shift on, take the column 2**i to the right
195 |                         tree(i)(j) <=    (tree(i-1)(j) AND NOT Shift(i))
196 |                                             OR (tree(i-1)(j-2**i) AND Shift(i));
197 |                     end if;
198 |                 end loop;
199 |             end loop;
200 |             if (ShRight = '0') then
201 |                 -- Shift left doesn't care about the rest of the register
202 |                 Dout <= tree(Shift'HIGH - BWNumeric);
203 |             else
204 |                 -- We have to reverse the lowest bits below MSBidx.
205 |                 for j in MSBidx downto 0 loop
206 |                     Dout(MSBidx - j) <= tree(Shift'HIGH - BWNumeric)(j);
207 |                 end loop;
208 |             end if;
209 |         end if;
210 |     end process barrel;
211 | end barrel_shifter_ncl;
212 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/cpu/shifters/barrel_shifter_no_signex.vhdl:
--------------------------------------------------------------------------------
  1 | -- vim: ts=4 sw=4 et
  2 | -- Barrel shifter
  3 | --
  4 | -- n-bit NCL barrel shifter.  Cannot do arithmetic (sign extension)
  5 | library IEEE;
  6 | use IEEE.std_logic_1164.all;
  7 | use IEEE.math_real."ceil";
  8 | use ieee.math_real."log2";
  9 | library async_ncl;
 10 | use async_ncl.ncl.all;
 11 | -- NCL 2:1 mux
 12 | --
 13 | -- Spurious outputs (glitch) happen if you use a non-NCL bit for
 14 | -- the shifter, so yes the MUX selector input has to be NCL.
 15 | --
 16 | --    Bit 1    Bit 0
 17 | --    |   |    |   |
 18 | --  ------------------
 19 | --  \                /-- NCL
 20 | --   \     OUT      /---Select
 21 | --     ------------
 22 | --         | |
 23 | --
 24 | 
 25 | 
 26 | -- Barrel shifter
 27 | --
 28 | -- Select bit of '1' selects the left (input) bit.
 29 | --
 30 | -- bit  4   3   2   1
 31 | --      |  -|  -|  -|
 32 | --      | | | | | | |
 33 | --      MUX MUX MUX AND--NOT-- Select bit 0
 34 | --      |  -|---|   |        |
 35 | --      | | |  -|---+         - To all MUX on first stage
 36 | --      | | | | |   | 
 37 | --      | | | | |   | 
 38 | --      MUX MUX AND AND--NOT-- Select bit 1
 39 | --      |   |   |   |        |
 40 | --      |   |   |   |         - to all MUX on this stage
 41 | --      |   |   |   |
 42 | --      AND AND AND AND--NOT-- Select bit 2 (to all AND on this stage)
 43 | --       |   |   |   |
 44 | --
 45 | -- In theory, it's faster to take the first stage if the shift is
 46 | -- all off or all on, but that's more tests and gates.
 47 | --
 48 | -- Barrel shifter r2 only has to be log(xlen), e.g 5 for 32-bit,
 49 | -- 6 for 64-bit, 7 for 128-bit.
 50 | entity e_barrel_shifter_ncl is
 51 | -- Only feed this a power of 2!
 52 |     generic ( n : positive );
 53 |     port(
 54 |         Din     : in  ncl_logic_vector(n-1 downto 0);
 55 |         Shift   : in  ncl_logic_vector(integer(ceil(log2(real(n))))-1 downto 0);
 56 |         ShRight : in  ncl_logic;
 57 |         Dout    : out ncl_logic_vector(n-1 downto 0)
 58 |     );
 59 | end e_barrel_shifter_ncl;
 60 | 
 61 | -- All computations require NCL-complete input signals and pass
 62 | -- NULL if any signal is incomplete.  This prevents invalid output.
 63 | --
 64 | -- This barrel shifter is reversible by using n muxes on input and
 65 | -- output to reverse the bit order (reverse input, shift left,
 66 | -- reverse output).
 67 | architecture a_barrel_shifter_ncl of e_barrel_shifter_ncl is
 68 |     type tree_array is array (Shift'RANGE) of ncl_logic_vector(n-1 downto 0);
 69 |     signal tree : tree_array := (others => (others => ('0', '0')));
 70 | begin
 71 | 
 72 |     -- This thing is actually inherently combinatorial
 73 |     barrel: process(all) is
 74 |     begin
 75 |         if (Shift'HIGH = 1) then
 76 |             -- If last shift bit is high, it shifts out to zero, so
 77 |             -- just set all output to zero
 78 |             Dout <= (others => ('0','0'));
 79 |         else
 80 |             for i in Shift'RANGE loop
 81 |                 for j in Din'RANGE loop
 82 |                     -- First row from Din
 83 |                     if (i = 0 and ShRight = '0') then
 84 |                         -- Shift left
 85 |                         if (j <= 2**i) then
 86 |                             -- AND gate instead of MUX
 87 |                             tree(i)(j) <= Din(j)
 88 |                                                 AND NOT Shift(i);
 89 |                         else
 90 |                             -- If shift bit not on, take this column;
 91 |                             -- if shift bit on, take the column 2**i right
 92 |                             tree(i)(j) <=    (Din(j) AND NOT Shift(i))
 93 |                                                 OR (Din(j-2**i) AND Shift(i));
 94 |                         end if;
 95 |                     elsif (i = 0 and ShRight = '1') then
 96 |                         -- Shift right
 97 |                         if (j <= 2**i) then
 98 |                             -- AND gate
 99 |                             -- First row from Din, reversed
100 |                             tree(i)(j) <= Din(Din'HIGH - j)
101 |                                                 AND NOT Shift(i);
102 |                         else
103 |                             -- If shift bit not on, take this column;
104 |                             -- if shift bit on, take the column 2**i to the right
105 |                             tree(i)(j) <=    (Din(Din'HIGH - j)
106 |                                                     AND NOT Shift(i))
107 |                                                 OR (Din(Din'HIGH - (j-2**i))
108 |                                                     AND Shift(i));
109 |                         end if;
110 |                     elsif (i = Shift'HIGH) then
111 |                         -- Final row, already handled if the shift bit is on.
112 |                         -- Reverse back to normal if shifting right.
113 |                         if (ShRight = '0') then
114 |                             Dout <= tree(i-1);
115 |                         elsif (ShRight = '1') then
116 |                             Dout(j) <= tree(i-1)(Din'HIGH - j);
117 |                         end if;
118 |                     else --
119 |                         if (j <= 2**i) then
120 |                             -- AND gate instead of MUX
121 |                             tree(i)(j) <= tree(i-1)(j)
122 |                                                 AND NOT Shift(i);
123 |                         else
124 |                             -- If shift bit not on, take this column;
125 |                             -- if shift on, take the column 2**i to the right
126 |                             tree(i)(j) <=    (tree(i-1)(j)
127 |                                               AND NOT Shift(i))
128 |                                           OR (tree(i-1)(j-2**i)
129 |                                               AND Shift(i));
130 |                         end if;
131 |                     end if;
132 |                 end loop;
133 |             end loop;
134 |         end if;
135 |     end process barrel;
136 | end a_barrel_shifter_ncl;
137 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/handshake/README.md:
--------------------------------------------------------------------------------
  1 | Asynchronous Handshake
  2 | ======================
  3 | 
  4 | A completion-detection handshake allows for delay-insensitive components.
  5 | 
  6 | Consider a component shaped as below:
  7 | ```
  8 |   ______________________________
  9 | -| Ready  (out)   Ready    (in) |-
 10 | -| Waiting (in)   Waiting (out) |-
 11 | =| d[0..x] (in)   d[0..x] (out) |=
 12 |  |______________________________|
 13 | ```
 14 | The above has a data input and a data output.  Another component with the same
 15 | interface would send and received data over the same interface.
 16 | 
 17 | This allows only the transitions below:
 18 | ```
 19 | Receiver:
 20 | W[in]   R[out]   Transition
 21 | 1       1        R[out] <= 0
 22 | 0       0        R[out] <= 1
 23 | 
 24 | Sender:
 25 | R[in]   W[out]   Transition
 26 | 1       0        W[out] <= 1
 27 | 0       1        W[out] <= 0
 28 | ```
 29 | 
 30 | The initialization  state is `W[out]=0, R[out]=X`, with `R[out]` transitioning
 31 | to `1` when ready.  To be more clear:
 32 | ```
 33 |      Sender           Receiver
 34 |  _______________   _______________
 35 | | Ready    (in) |-| Ready   (out) |
 36 | | Waiting (out) |-| Waiting  (in) |
 37 | | d[0..x] (out) |=| d[0..x]  (in) |
 38 | |_______________| |_______________|
 39 | ```
 40 | This proceeds as follows:
 41 | ```
 42 | Sender:   W = 0        State: R=0, W=0
 43 | Receiver: R = 0        State: R=0, W=0
 44 | Sender becomes ready to send data (valid here...)
 45 | Sender:   d <= [data]
 46 | Receiver becomes ready to accept new data
 47 | Receiver: R <= 1       State: R=1, W=0
 48 | (...or sender can become ready to send here)
 49 | Sender:   W <= 1       State: R=1, W=1
 50 | Receiver stores the data and no longer needs d
 51 | Receiver: R <= 0       State: R=0, W=1
 52 | Sender acknowledges (any state on Sender's end)
 53 | Sender:   W <= 0       State: R=0, W=0
 54 | ```
 55 | 
 56 | The Receiver must buffer the data or the entire asynchronous CPU will
 57 | simply wait until each single instruction is 100% complete and the
 58 | output sent to main RAM before fetching the next single instruction.
 59 | 
 60 | To do this, we use a delay-insensitive flip flop as a one-bit NCL
 61 | register:
 62 | 
 63 | ![Delay-Insensitev Flip Flop Buffer](ncl_async_register.png)
 64 | 
 65 | This register outputs `STORED` or `St` when `D` is non-NULL, `Q = D`,
 66 | and `W` is `1` (i.e. sender is signaling the data on `D` is valid).
 67 | In this way, it is insensitive to its own delay and the delay of the
 68 | sending circuit.
 69 | 
 70 | Assembling these into an n-bit register produces the below black box:
 71 | ```
 72 |   __________
 73 | =| D     EN |-
 74 | -| W    CLR |-
 75 |  |       St |-
 76 |  |        Q |=
 77 |  |__________|
 78 | ```
 79 | The receiver checks `Q` (for valid non-NULL data) and `St` (for the
 80 | register indicating that it itself considers the data non-NULL and
 81 | stored) before transitionig `R` from `1` to `0`.
 82 | 
 83 | For proper operation, the receiver should not transition `R` from `0`
 84 | to `1`  until first setting `CLR`, then validating the circuit's final
 85 | output is NULL.  This ensures the circuit has been flushed and won't
 86 | generate spurious non-NULL output when new data comes in.
 87 | 
 88 | Once the receiver has validated its output is flushed, it transitions
 89 | `CLR` to `0` and `R` to `1`.  The `EN` signal is suppressed until `CLR`
 90 | is `0` so as to avoid an `EN AND CLR` situation (although the DFF
 91 | respects `CLR` over `EN` in its current implementation).
 92 | 
 93 | Importantly, `D` must remain valid until `R` transitions to `0`.  The
 94 | sending circuit can output `D` continuously when `R` is `0`, and so
 95 | can send its output directly to `D` with no buffer.
 96 | 
 97 | The sender should transition its output to NULL and then to valid;
 98 | however, the NCL check fails on both `00` and `11`, so even if the
 99 | circuit glitches and stores `Q = D = 11`, it will not proceed.  This
100 | suppresses the data hazard.  Consider transitioning from `10` to
101 | `01`:
102 | ```
103 | 10 => 00 => 01
104 | 10 => 11 => 01
105 | ```
106 | The supposed flush moves to `00`, indicating NULL; but a glitch where
107 | the `0` to `1` transition arrives before the corresponding `1` to `0`
108 | transition moves to `11`, which is treated as NULL.  The circuit
109 | behaves identically in either case.  The flush is to ensure a
110 | fully-valid but *incorrect* value does not land on `D` while `W` is
111 | `1`, which would result in the receiver accepting the data as
112 | correct and processing it—a severe data hazard.
113 | 
114 | Altogether, the handshake protocol and the delay-insensitive NCL
115 | registers provide for asynchronous data transfer between internal
116 | components.
117 | 
118 | # Example:  Logical NOT
119 | 
120 | The below circuit implements the receiver and sender handshake, a
121 | one-bit delay-insensitive register, and a logical NOT (which
122 | requires no gates itself:  the signals are connected to inverse
123 | output).
124 | 
125 | ![Delay-Insensitev Logical NOT](ncl_async_logical_not.png)
126 | 
127 | The NCL completion check occurs several times in larger circuits:
128 | ```
129 |  -AH AL-
130 | |  | |  |
131 | |  XOR  |
132 | |  _|_  |
133 | | |   | |
134 | AND   AND
135 |   |   |
136 |  AH   AL
137 | ```
138 | This check uses one XOR gate and two AND gates per one NCL bit
139 | and is effectively a specialized four-to-two mux selecting
140 | between `[AH, AL]` when `AH XOR AL = 1` and `[0 0]` when
141 | `AH XOR AL = 0`.
142 | 
143 | This component can wrap around other components:
144 | ```
145 |   AH AL-   -BL BH
146 |    | |  | |  | |
147 |    XOR  | |  XOR
148 |     |   | |   |
149 |     |   AND   |
150 |     |   | |   |
151 |     | NOT |   |
152 |     |   | |   |
153 |      -AND AND-
154 |         | |
155 |        OH OL
156 | ```
157 | The above performs a logical `AND` of two bits `A` and `B`,
158 | outputting `NULL` when either input is `NULL`.  The same
159 | circuit will be needed around the next component in the
160 | event the `XOR` signals propagate befor the `AND`, causing
161 | the `NOT` to output a signal when the `AND` gate is
162 | receiving invalid input.  This is unlikely, but
163 | mathematically possible in the event the signal propagates
164 | through the central `AND` gate later than it does from the
165 | peripheral `XOR` gates.
166 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/handshake/handshake.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | -- Handshake
 3 | --
 4 | --      Sender           Receiver
 5 | --  _______________   _______________
 6 | -- | Ready    (in) |-| Ready   (out) |
 7 | -- | Waiting (out) |-| Waiting  (in) |
 8 | -- | d[0..x] (out) |=| d[0..x]  (in) |
 9 | -- |_______________| |_______________|
10 | --
11 | -- Transitions:
12 | -- W[in]   R[out]   Transition
13 | -- 1       1        R[out] <= 0
14 | -- 0       0        R[out] <= 1
15 | --
16 | -- R[in]   W[out]   Transition
17 | -- 1       0        W[out] <= 1
18 | -- 0       1        W[out] <= 0
19 | --
20 | -- Ready and Waiting are TTL, not NCL:  there is only
21 | -- one valid transition each way on the En input to the
22 | -- handshakes (0 to 1 or 1 to 0).  State cannot go from
23 | -- enabled to null to enabled, because the circuit
24 | -- using the handshake MUST be able to identify with
25 | -- 100% certainty that it's transitioned TO a ready
26 | -- or waiting state and with 100% certainty that it's
27 | -- transitioned TO a not-ready or not-waiting state.
28 | -- This is why the asynchronous circuits using this
29 | -- handshake MUST verify complete data input BEFORE
30 | -- indicating they're no longer Ready (have received
31 | -- the data) AND confirm they've fully flushed the
32 | -- input buffer AND the output NULL BEFORE indicating
33 | -- they ARE ready:  spurious mis-estimates of completion
34 | -- are fatal. 
35 | library IEEE;
36 | use IEEE.std_logic_1164.all;
37 | library async_ncl;
38 | use async_ncl.ncl.all;
39 | 
40 | entity e_ncl_handshake_sender is
41 |     generic ( n : positive );
42 | 	port(
43 | 	    Ready     : in  std_logic;
44 | 	    -- Output data
45 | 	    Dout      : in  ncl_logic_vector(n-1 downto 0);
46 | 	    -- Waiting signal
47 | 	    Waiting   : out std_logic
48 |     );
49 | end e_ncl_handshake_sender;
50 | 
51 | library IEEE;
52 | use IEEE.std_logic_1164.all;
53 | library async_ncl;
54 | use async_ncl.ncl.all;
55 | 
56 | entity e_ncl_handshake_receiver is
57 |     port(
58 |         Ready    : out std_logic;
59 |         -- Enable "Ready" output
60 |         En       : in  std_logic;
61 |         Waiting  : in  std_logic;
62 |         EnOut    : Out std_logic
63 |     );
64 | end e_ncl_handshake_receiver;
65 | 
66 | architecture ncl_handshake_sender of e_ncl_handshake_sender is
67 |     signal data_complete : std_logic;
68 |     signal data_flushed  : std_logic;
69 |     signal data_complete_a : std_logic_vector(Dout'RANGE);
70 | begin
71 |     -- Track when outgoing data is all not null
72 |     data_complete <= NOT (OR ncl_is_null(Dout));
73 |     
74 |     -- Track when absolutely every outgoing data LINE is '0'
75 |     G1: for i in Dout'RANGE generate
76 |         data_complete_a(i) <= Dout(i).H OR Dout(i).L;
77 |     end generate G1;
78 |     data_flushed  <= NOT (OR data_complete_a);
79 | 
80 |     -- Signal data is waiting when receiver is Ready AND
81 |     -- our data lines are complete;
82 |     --
83 |     -- Keep signaling data is waiting until our data
84 |     -- lines are flushed.
85 |     --
86 |     -- Circuit must NOT alter incoming data UNTIL the
87 |     -- incoming READY signal is dropped!
88 |     Waiting <=    (Ready   AND data_complete)
89 |                OR (Waiting AND data_flushed);
90 | end ncl_handshake_sender;
91 | 
92 | architecture ncl_handshake_receiver of e_ncl_handshake_receiver is
93 | begin
94 |     -- Waiting MUST only transition from 0 to 1 when sending Ready!
95 |     -- En should be 1 when ready for new data.
96 |     EnOut   <= Ready AND Waiting AND En; 
97 |     Ready   <= (Waiting NOR (NOT En)) OR (Waiting AND En);
98 | end ncl_handshake_receiver;
99 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/handshake/ncl_async_logical_not.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrmoserbaltimore/risc-v-cpu-asynchronous/3c0864c1b023da5a7b3475c484f0aca1b9310e09/RISC-V.srcs/asynchronous/infrastructure/handshake/ncl_async_logical_not.png


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/handshake/ncl_async_register.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrmoserbaltimore/risc-v-cpu-asynchronous/3c0864c1b023da5a7b3475c484f0aca1b9310e09/RISC-V.srcs/asynchronous/infrastructure/handshake/ncl_async_register.png


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/handshake/register.vhdl:
--------------------------------------------------------------------------------
  1 | -- vim: sw=4 ts=4 et
  2 | -- NCL Buffer Register
  3 | --
  4 | -- INTEGRATION, the VLSI Journal, 59 (2017), 31-41,
  5 | -- doi 10.1016/j.vlsi.2017.05.002
  6 | -- "Simple Method of Asynchronous Circuits Implementation
  7 | -- in Commercial FPGAs" by Zbigniew Hajduk
  8 | --
  9 | -- The article above describes an Asynchronous Pipeline
 10 | -- Register (APR) as a single-ended data bus fed into a
 11 | -- network of two comparators, two flip-flops, AND gates,
 12 | -- inverters, and three multiplexers.
 13 | --
 14 | -- That design is not suitable for NULL convention logic.
 15 | -- Instead, we use a simpler overall circuit with two D
 16 | -- Flip-Flops, two Comparators, an N-Completion circuit,
 17 | -- and three AND gates.  This circuit specifically
 18 | -- interfaces with our handshake protocol and allows
 19 | -- reliable register storage without clock.
 20 | --
 21 | -- In a sane world, this would be defined in async_ncl,
 22 | -- and we could either have an architecture instantiate
 23 | -- a component defined in the async_ncl package as
 24 | -- being the entity(architecture) pair.  VHDL as-is
 25 | -- is analogous to pinouts being defined and sockets
 26 | -- being sold, but nobody sells the chips or any
 27 | -- design for the chips, so you have to make it yourself.  
 28 | library IEEE;
 29 | use IEEE.std_logic_1164.all;
 30 | library async_ncl;
 31 | use async_ncl.ncl.all;
 32 | 
 33 | entity e_ncl_latch is
 34 |     port (
 35 |         D       : in  ncl_logic;
 36 |         EN, CLR : in  std_logic;
 37 |         Q       : out ncl_logic
 38 |     );
 39 | end e_ncl_latch;
 40 | 
 41 | architecture ncl_latch of e_ncl_latch is
 42 | begin
 43 | 
 44 |     latch: process(all)
 45 |     begin
 46 |         -- Activating both is not valid! In practice, favors CLR
 47 |         if (CLR) then
 48 |             -- Clear Q to all NULL regardless of D
 49 |             Q <= ('0', '0');
 50 |         elsif (EN) then
 51 |             Q <= D;
 52 |         end if;
 53 |     end process latch;
 54 | end ncl_latch;
 55 | 
 56 | library IEEE;
 57 | use IEEE.std_logic_1164.all;
 58 | library async_ncl;
 59 | use async_ncl.ncl.all;
 60 | -- Registered logic for wide bus
 61 | --
 62 | -- When R<='1' and D is NCL-complete, EN is activated..
 63 | --
 64 | -- When D=Q and D is NCL-complete and W<='1', 
 65 | entity e_ncl_logic_register is
 66 |     generic ( n: positive );
 67 |     port (
 68 |         D          : in  ncl_logic_vector(n-1 downto 0);
 69 |         -- Receiver R and W, that is, Ready(out) Waiting(in)
 70 |         -- EN should usually come from the sender handshake
 71 |         EN, W, CLR : in  std_logic;
 72 |         Q          : out ncl_logic_vector(n-1 downto 0);
 73 |         Stored     : out std_logic
 74 |     );
 75 | end e_ncl_logic_register;
 76 | 
 77 | use work.e_ncl_latch;
 78 | library async_ncl;
 79 | use async_ncl.ncl.all;
 80 | -- n-bit delay-insensitive asynchronous register
 81 | architecture ncl_logic_register of e_ncl_logic_register is
 82 |     -- On when time to enable the latch array
 83 |     signal en_latch         : std_logic;
 84 | begin
 85 |     latches: for i in n downto 0 generate
 86 |         latch: entity e_ncl_latch(ncl_latch)
 87 |         port map    ( D      => D(i),
 88 |                       EN     => en_latch,
 89 |                       CLR    => CLR,
 90 |                       Q      => Q(i));
 91 |    end generate;
 92 |     -- Handshake protocol allows data in at all times, but
 93 |     -- only stores it when R=1, and only sets R=0 when W=1 AND
 94 |     -- the data has been stored (i.e. once the data-in lines
 95 |     -- can change without impacting the circuit, R <= 0 ).
 96 |     --
 97 |     -- CLR should never be '1' while R is '1'; however, it is
 98 |     -- intended to keep R = '0' until Q reads full NULL, then
 99 |     -- set CLR <= '0' and R <= '1'.  This creates a glitch
100 |     -- wherein R propagates more slowly than CLR, so we ignore
101 |     -- R when CLR is set.
102 |     --
103 |     -- Adding AND NOT Stored would tend to cut off EN just
104 |     -- slightly earlier: Stored <= '1' has to propagate for
105 |     -- R <= '0', which has to propagate through the AND gate
106 |     -- to set EN <= '0'.
107 |     en_latch <= '1' when EN AND (NOT CLR) else
108 |                 '0';
109 | 
110 |     -- D and Q must be distinct signals
111 |     Stored <= '1' when (NOT ncl_is_null(D)) AND (D = Q) AND W = '1' else
112 |               '0';
113 | end ncl_logic_register;
114 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/ncl/README.md:
--------------------------------------------------------------------------------
  1 | Delay-insensitive encoding
  2 | ===========================
  3 | 
  4 | Asynchronous components use one-hot delay-insensitive encoding internally.  In
  5 | standard TTL, a 32-bit data bus has 32 wires; in dual-rail one-hot encoding,
  6 | the same data bus has 64 wires. Each pair carries one bit in one-hot encoding:
  7 | ```
  8 | Dx0    Dx1    Value
  9 |   0      0    NULL
 10 |   1      0    0
 11 |   0      1    1
 12 |   1      1    Halt and catch fire
 13 | ```
 14 | Each step internally must encode in this way to ensure detectable completion.
 15 | The final output reaches a completion-detection circuit which then signals
 16 | completion.
 17 | 
 18 | The single-bit encoder looks as such:
 19 | ```
 20 | INPUT
 21 |   |----
 22 |  NOT   |
 23 |   |    |
 24 |  dx0  dx1
 25 | ```
 26 | The dx1 bit is just the input, while the dx0 bit is the input inverted, as
 27 | shown in the encoding table above.  Note that `1`-bits propagate slightly
 28 | faster than `0` bits if the NOT introduces delay.
 29 | 
 30 | A `0` input gives `[1 0]`, while a `1` gives `[0 1]`.  While switching from
 31 | `1` to `0` is flawless, a glitch occurs switching from `0` to `1`:  the `1`
 32 | can propagate and produce `[1 1]` outputs.  To handle this glitch, validation
 33 | circuits must treat `[1 1]` identically to `[0 0]`, which is easy: `A XOR B`
 34 | gives `1` if valid and `0` if `NULL` or invalid, so take `A XOR B = 1` as
 35 | completion.
 36 | 
 37 | The single-bit decoder looks as such:
 38 | ```
 39 | Theoretical    Optimized
 40 | INPUT          INPUT
 41 |  | |----       |   |
 42 |  XOR    |          | 
 43 |   |     |        OUTPUT
 44 |    ----AND
 45 |         |
 46 |       OUTPUT
 47 | ```
 48 | The optimized decoder uses no gates.  In either case, the component using
 49 | the decoder must validate completion before considering the output valid. 
 50 | 
 51 | Consider a two-bit adder, as below:
 52 | ```
 53 | INPUT:  A1  B1       A0  B0
 54 |         0   1        1   1
 55 |         |   |        |   |
 56 |       [Encoder]    [Encoder]
 57 |        1 0 0 1      0 1 0 1
 58 |        | | | |      | | | |
 59 |      [Full Adder]-0[Half Adder] (Cout=[0 1]=1)
 60 |      [          ]-1[          ]
 61 |        0 1 1 0       1 0        (Carry=[0 1], S1=[1 0], S0=[1 0])
 62 | *      | | | |       | |
 63 |       [Decoder]    [Decoder]
 64 | OUTPUT:  1 0         0
 65 | ```
 66 | The input is two binary values, `A=01` and `B=11`.  The encoder encodes these
 67 | to `A=[10 01]` and `B=[01 01]`.  The adders themselves also encode in this
 68 | manner (note this is a ripple-carry adder).
 69 | 
 70 | The outputs go to a decoder, which asserts 00 on the output and 1 as the carry
 71 | bit.  Note that adding 1 to 0b11 overflows and produces 0 and a carry bit.
 72 | 
 73 | The six output lines (marked `*`) also drive a gate tree as follows:
 74 | ```
 75 | *      | | | |       | |
 76 |        XOR XOR       XOR Ready[In]--(INPUT)
 77 |          | |           | |
 78 |          AND           AND
 79 | 	  |             |
 80 |            -----AND-----
 81 |                  |
 82 |             Waiting[Out]
 83 | ```
 84 | Note the convention here:  on NULL `[0 0]`, the XOR gates output nothing, and
 85 | the component does not assert Waiting.  Because `[1 1]` is an invalid state,
 86 | OR gates also work rather than XOR; an XOR gate causes halt on invalid
 87 | encoding, while OR causes invalid output.
 88 | 
 89 | Also note Waiting[Out] is delayed by the gate delay of an XOR gate and *two*
 90 | AND gates, while the assertion of a `1` bit through the decoder is delayed
 91 | by an XOR gate and *one* AND gate.  For single-bit output, the delay is the
 92 | same as a `1` bit decode:
 93 | ```
 94 |  | |
 95 |  XOR Ready[In]--(INPUT)
 96 |    | |
 97 |    AND
 98 |     |
 99 | Waiting[Out]
100 | ```
101 | In general, for `n` bits of output, the delay to assert Waiting[Out] when
102 | Ready[In] is asserted and all data lines are available is one level of XOR
103 | gates plus `log(n+1,2)` levels of AND gates.
104 | 
105 | When Ready[In] becomes `0`, Waiting[Out] automatically becomes `0`.
106 | 
107 | The component can be made to also not pass Ready[Out] until all outputs read
108 | `[0 0]` and Waiting[In] reads `0`:
109 | ```
110 | *      | | | |       | |
111 |        NOR NOR       NOR Waiting[In]--(INPUT)
112 |          | |           |       |
113 |          AND           AND----NOT
114 | 	  |             |
115 |            -----AND-----
116 |                  |
117 |              Ready[Out]
118 | ```
119 | In this way, the component asserts Waiting[Out] when all outputs are ready,
120 | and asserts Ready[Out] when all outputs are cleared and Waiting[in] is not
121 | asserted.
122 | 
123 | These assertions should be latched and reset at appropriate times.  For
124 | example: Ready[Out] must remain asserted until the component is no longer
125 | affected by state changes on the data bus.
126 | 
127 | When two components are both asynchronous, there is no reason to encode and
128 | decode the output between them.  For example, the binary adder above may be
129 | connected to an instruction pipeline which itself encodes and decodes the
130 | data, or which itself only interfaces with asynchronous components.  In such
131 | a case, the decoding delay is zero and the validation delay is greater than
132 | zero.  Such a configuration would need to convert when leaving its domain,
133 | such as when sending data to memory or peripherals.
134 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/ncl/ncl.vhdl:
--------------------------------------------------------------------------------
  1 | -- vim: sw=4 ts=4 et
  2 | -- 2 hot 2 handle
  3 | --
  4 | -- An NCL bit is basically two rail one-hot specified as such:
  5 | --
  6 | --   d : std_logic_vector(1 downto 0);
  7 | --
  8 | --   d = "00" -- NULL
  9 | --   d = "10" -- 0, note d(1) = '1', d(0) = '0'
 10 | --   d = "01" -- 1
 11 | --
 12 | -- Our NCL implementation operates as follows:
 13 | --
 14 | --   entity foo is
 15 | --     port( d : (in) ncl_logic(7 downto 0) );
 16 | --   end foo;
 17 | --
 18 | -- d(0)(L) will give the low bit, d(0)(H) will give the high bit,
 19 | -- on data bit 0.
 20 | library IEEE;
 21 | use IEEE.std_logic_1164.all;
 22 | 
 23 | package ncl is
 24 |     type ncl_logic is record
 25 |         L : std_logic;
 26 |         H : std_logic;
 27 |     end record;
 28 | 
 29 |     type ncl_logic_vector is array (natural range <>) of ncl_logic;
 30 | 
 31 |     -- NULL check
 32 |     function ncl_is_null(d : ncl_logic)        return std_logic;
 33 |     function ncl_is_null(d : ncl_logic_vector) return std_logic_vector;
 34 |     function ncl_is_null(d : ncl_logic)        return boolean;
 35 |     function ncl_is_null(d : ncl_logic_vector) return boolean;
 36 |     -- Encoder and decoder
 37 |     function ncl_encode (d : std_logic)        return ncl_logic;
 38 |     function ncl_encode (d : std_logic_vector) return ncl_logic_vector;
 39 |     function ncl_decode (d : ncl_logic)        return std_logic;
 40 |     function ncl_decode (d : ncl_logic_vector) return std_logic_vector;
 41 |     -- Logic operators
 42 |     function "and"  (l, r: ncl_logic) return ncl_logic;
 43 |     function "nand" (l, r: ncl_logic) return ncl_logic;
 44 |     function "or"   (l, r: ncl_logic) return ncl_logic;
 45 |     function "nor"  (l, r: ncl_logic) return ncl_logic;
 46 |     function "xor"  (l, r: ncl_logic) return ncl_logic;
 47 |     function "xnor" (l, r: ncl_logic) return ncl_logic;
 48 |     function "not"  (l   : ncl_logic) return ncl_logic;
 49 |     -- Logical operators on multiple bits
 50 |     function "and"  (l, r: ncl_logic_vector) return ncl_logic_vector;
 51 |     function "nand" (l, r: ncl_logic_vector) return ncl_logic_vector;
 52 |     function "or"   (l, r: ncl_logic_vector) return ncl_logic_vector;
 53 |     function "nor"  (l, r: ncl_logic_vector) return ncl_logic_vector;
 54 |     function "xor"  (l, r: ncl_logic_vector) return ncl_logic_vector;
 55 |     function "xnor" (l, r: ncl_logic_vector) return ncl_logic_vector;
 56 |     function "not"  (l   : ncl_logic_vector) return ncl_logic_vector;
 57 |     -- Comparators
 58 |     function "="    (l, r: ncl_logic) return boolean;
 59 |     function "="    (l  : ncl_logic; r: std_logic) return boolean;
 60 | end;
 61 | 
 62 | package body ncl is
 63 |     -- returns the glitch "11" as NULL as well
 64 |     function ncl_is_null(d: ncl_logic) return std_logic is
 65 |     begin
 66 |         return d.H XNOR d.L;
 67 |     end function;
 68 | 
 69 |     function ncl_is_null(d : ncl_logic_vector) return std_logic_vector is
 70 |         variable dout : std_logic_vector(d'RANGE);
 71 |     begin
 72 |         for i in d'RANGE loop
 73 |             dout(i) := ncl_is_null(d(i));
 74 |         end loop;
 75 |         return dout;
 76 |     end function;
 77 | 
 78 |     function ncl_is_null(d: ncl_logic) return boolean is
 79 |     begin
 80 |         -- Any result that's not '1' is not non-NULL
 81 |         return (d.H XNOR d.L) /= '1';
 82 |     end function;
 83 |     
 84 |     function ncl_is_null(d : ncl_logic_vector) return boolean is
 85 |     begin
 86 |         for i in d'RANGE loop
 87 |             -- True if anything in here is null
 88 |             if (ncl_is_null(d(i))) then
 89 |                 return true;
 90 |             end if;
 91 |         end loop;
 92 |         return false;
 93 |     end function;
 94 |     
 95 |     function ncl_encode (d : std_logic) return ncl_logic is
 96 |     begin
 97 |         return (H => NOT d, L => d);
 98 |     end function;
 99 | 
100 |     function ncl_encode (d : std_logic_vector) return ncl_logic_vector is
101 |         variable dout : ncl_logic_vector(d'RANGE);
102 |     begin
103 |         for i in d'RANGE loop
104 |             dout(i) := ncl_encode(d(i));
105 |         end loop;
106 |         return dout;
107 |     end function;
108 | 
109 |     -- In NCL, the low bit represents the value and the high
110 |     -- bit is the inverse of the value.
111 |     function ncl_decode (d : ncl_logic) return std_logic is
112 |     begin
113 |         -- Invalid, can't decode.
114 |         -- Can't read this reliably, so check BEFORE decoding!
115 |         if (ncl_is_null(d)) then
116 |             return 'U';
117 |         end if;
118 |         return (d.L);
119 |     end function;
120 | 
121 |     function ncl_decode (d : ncl_logic_vector) return std_logic_vector is
122 |         variable dout : std_logic_vector(d'RANGE);
123 |     begin
124 |         for i in d'RANGE loop
125 |             dout(i) := ncl_decode(d(i));
126 |         end loop;
127 |         return dout;
128 |     end function;
129 | 
130 |     -- For all logical functions, the low bit is the logical
131 |     -- operator applied to the low bits, and the high bit is
132 |     -- the inverse applied to the high bits (or the low bit
133 |     -- inverted).
134 |     --
135 |     -- If either is NULL, return NULL.
136 |     
137 |     -- The AND circuit should look like this:
138 |     --
139 |     -- AH AL-   -BL BH
140 |     --  | |  | |  | |
141 |     --  XOR  | |  XOR
142 |     --   |   | |   |
143 |     --   |   AND   |
144 |     --   |   | |   |
145 |     --   | NOT |   |
146 |     --   |   | |   |
147 |     --    ---|-|-AND
148 |     --       | |   |
149 |     --      -|-|---+
150 |     --     | | |   |
151 |     --     AND AND-
152 |     --       | |
153 |     --      OH OL
154 |     --
155 |     function "and" (l, r : ncl_logic) return ncl_logic is
156 |     begin
157 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
158 |             return (H=>'0', L=>'0');
159 |         end if;
160 |         return ncl_encode(l.L AND r.L);
161 |     end function;
162 | 
163 |     function "nand" (l, r : ncl_logic) return ncl_logic is
164 |     begin
165 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
166 |             return (H=>'0', L=>'0');
167 |         end if;
168 |         return ncl_encode(l.L NAND r.L);
169 |     end function;
170 | 
171 |     function "or" (l, r : ncl_logic) return ncl_logic is
172 |     begin
173 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
174 |             return (H=>'0', L=>'0');
175 |         end if;
176 |         return ncl_encode(l.L OR r.L);
177 |     end function;
178 | 
179 |     function "nor" (l, r : ncl_logic) return ncl_logic is
180 |     begin
181 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
182 |             return (H=>'0', L=>'0');
183 |         end if;
184 |         return ncl_encode(l.L NOR r.L);
185 |     end function;
186 | 
187 |     function "xor" (l, r : ncl_logic) return ncl_logic is
188 |     begin
189 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
190 |             return (H=>'0', L=>'0');
191 |         end if;
192 |         return ncl_encode(l.L XOR r.L);
193 |     end function;
194 | 
195 |     function "xnor" (l, r : ncl_logic) return ncl_logic is
196 |     begin
197 |         if (ncl_is_null(l) OR ncl_is_null(r)) then
198 |             return (H=>'0', L=>'0');
199 |         end if;
200 |         return ncl_encode(l.L XNOR r.L);
201 |     end function;
202 | 
203 |     -- The inverter is special:  just swap the signals.
204 |     function "not" (l    : ncl_logic) return ncl_logic is
205 |     begin
206 |             return (H=>l.L, L=>l.H);
207 |     end function;
208 | 
209 |     -- Above functions on arrays
210 |     function "and" (l, r : ncl_logic_vector) return ncl_logic_vector is
211 |         variable dout : ncl_logic_vector(l'RANGE);
212 |     begin
213 |         for i in l'RANGE loop
214 |             dout(i) := l(i) AND r(i);
215 |         end loop;
216 |         return dout;
217 |     end function;
218 | 
219 |     function "nand" (l, r : ncl_logic_vector) return ncl_logic_vector is
220 |         variable dout : ncl_logic_vector(l'RANGE);
221 |     begin
222 |         for i in l'RANGE loop
223 |             dout(i) := l(i) NAND r(i);
224 |         end loop;
225 |         return dout;
226 |     end function;
227 | 
228 |     function "or" (l, r : ncl_logic_vector) return ncl_logic_vector is
229 |         variable dout : ncl_logic_vector(l'RANGE);
230 |     begin
231 |         for i in l'RANGE loop
232 |             dout(i) := l(i) OR r(i);
233 |         end loop;
234 |         return dout;
235 |     end function;
236 | 
237 |     function "nor" (l, r : ncl_logic_vector) return ncl_logic_vector is
238 |         variable dout : ncl_logic_vector(l'RANGE);
239 |     begin
240 |         for i in l'RANGE loop
241 |             dout(i) := l(i) NOR r(i);
242 |         end loop;
243 |         return dout;
244 |     end function;
245 | 
246 |     function "xor" (l, r : ncl_logic_vector) return ncl_logic_vector is
247 |         variable dout : ncl_logic_vector(l'RANGE);
248 |     begin
249 |         for i in l'RANGE loop
250 |             dout(i) := l(i) XOR r(i);
251 |         end loop;
252 |         return dout;
253 |     end function;
254 | 
255 |     function "xnor" (l, r : ncl_logic_vector) return ncl_logic_vector is
256 |         variable dout : ncl_logic_vector(l'RANGE);
257 |     begin
258 |         for i in l'RANGE loop
259 |             dout(i) := l(i) XNOR r(i);
260 |         end loop;
261 |         return dout;
262 |     end function;
263 | 
264 |     function "not" (l    : ncl_logic_vector) return ncl_logic_vector is
265 |         variable dout : ncl_logic_vector(l'RANGE);
266 |     begin
267 |         for i in l'RANGE loop
268 |             dout(i) := NOT l(i);
269 |         end loop;
270 |         return dout;
271 |     end function;
272 | 
273 |     -- Comparators
274 |     function "="    (l, r: ncl_logic) return boolean is
275 |     begin
276 |         if (ncl_is_null(l) or ncl_is_null(r) or (l.L /= r.L)) then
277 |             return false;
278 |         end if;
279 |         return true;
280 |     end function;
281 | 
282 |     function "="    (l: ncl_logic; r: std_logic) return boolean is
283 |     begin
284 |         if (ncl_is_null(l) or (l.L /= r)) then
285 |             return false;
286 |         end if;
287 |         return true;
288 |     end function;
289 | 
290 | end package body;
291 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/transceiver/transceiver_async_to_sync.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | -- Synchronous-Asynchronous Transceiver 
 3 | --
 4 | -- Connects to/from a synchronous interface.
 5 | --
 6 | -- Sync to Async
 7 | --
 8 | -- Theory of operation:
 9 | --
10 | -- An asynchronous interface recognizes when data is ready and negotiates
11 | -- completion state continuously.  Synchronous circuits rely on a clock
12 | -- timed to at least the delay of the circuits.
13 | --
14 | -- An asynchronous interface is delay-insensitive and can wait forever for
15 | -- input or for a neighboring circuit to be ready to receive input.  As
16 | -- such, any asynchronous circuit can synchronize to a clock and interface
17 | -- with other asynchronous circuits via the asynchronous protocol, only
18 | -- experiencing additional delay.
19 | --
20 | -- Interfacing between asynchronous and synchronous circuits only requires
21 | -- an asynchronous circuit clocked to the synchronous circuit.  The client
22 | -- circuits only connect to their respective interfaces, thus translating
23 | -- between the two.
24 | library IEEE;
25 | use IEEE.std_logic_1164.all;
26 | use work.ncl.all;
27 | 
28 | entity e_transceiver_async_to_sync is
29 |     generic( n: positive );
30 |     port(
31 |     clk   : in  std_logic;
32 |     din   : in  ncl_logic_vector(n-1 downto 0);
33 |     dout  : out std_logic_vector(n-1 downto 0);
34 |     -- Write signal
35 |     wr    : out std_logic
36 |     );
37 | end e_transceiver_async_to_sync;
38 | 
39 | -- FIXME:  these need a complete transceiver architecture with
40 | -- an appropriate handshake.
41 | 
42 | architecture transceiver_async_to_sync of e_transceiver_async_to_sync is
43 | begin
44 |     process(clk)
45 |     begin
46 |         if (rising_edge(clk) and not ncl_is_null(din)) then
47 |             dout <= ncl_decode(din);
48 |             wr   <= '1';
49 |         elsif (falling_edge(clk)) then
50 |             wr   <= '0';
51 |         end if;
52 |     end process;
53 | end transceiver_async_to_sync;
54 | 


--------------------------------------------------------------------------------
/RISC-V.srcs/asynchronous/infrastructure/transceiver/transceiver_sync_to_async.vhdl:
--------------------------------------------------------------------------------
 1 | -- vim: sw=4 ts=4 et
 2 | -- Synchronous-Asynchronous Transceiver 
 3 | --
 4 | -- Connects to/from a synchronous interface.
 5 | --
 6 | -- Theory of operation:
 7 | --
 8 | -- An asynchronous interface recognizes when data is ready and negotiates
 9 | -- completion state continuously.  Synchronous circuits rely on a clock
10 | -- timed to at least the delay of the circuits.
11 | --
12 | -- An asynchronous interface is delay-insensitive and can wait forever for
13 | -- input or for a neighboring circuit to be ready to receive input.  As
14 | -- such, any asynchronous circuit can synchronize to a clock and interface
15 | -- with other asynchronous circuits via the asynchronous protocol, only
16 | -- experiencing additional delay.
17 | --
18 | -- Interfacing between asynchronous and synchronous circuits only requires
19 | -- an asynchronous circuit clocked to the synchronous circuit.  The client
20 | -- circuits only connect to their respective interfaces, thus translating
21 | -- between the two.
22 | library IEEE;
23 | use IEEE.std_logic_1164.all;
24 | use work.ncl.all;
25 | 
26 | entity e_transceiver_sync_to_async is
27 |     generic( n: positive );
28 |     port(
29 |     clk   : in  std_logic;
30 |     din   : in  std_logic_vector(n-1 downto 0);
31 |     dout  : out ncl_logic_vector(n-1 downto 0)
32 |     );
33 | end e_transceiver_sync_to_async;
34 | 
35 | -- FIXME:  these need a complete transceiver architecture with
36 | -- an appropriate handshake.
37 | architecture transceiver_sync_to_async of e_transceiver_sync_to_async is
38 | begin
39 |     process(clk)
40 |     begin
41 |         if (rising_edge(clk)) then
42 |             dout <= ncl_encode(din);
43 |         elsif (falling_edge(clk)) then
44 |             -- Send null
45 |             dout <= (others => ('0', '0'));
46 |         end if;
47 |     end process;
48 | end transceiver_sync_to_async;


--------------------------------------------------------------------------------
/RISC-V.xpr:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- Product Version: Vivado v2019.2.1 (64-bit)              -->
  3 | <!--                                                         -->
  4 | <!-- Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.   -->
  5 | 
  6 | <Project Version="7" Minor="44" Path="C:/Users/john/Documents/FPGA/Xilinx/RISC-V/RISC-V.xpr">
  7 |   <DefaultLaunch Dir="$PRUNDIR"/>
  8 |   <Configuration>
  9 |     <Option Name="Id" Val="69f00ae8120c401bbc7755728a4e9c60"/>
 10 |     <Option Name="Part" Val="xc7z020clg400-1"/>
 11 |     <Option Name="CompiledLibDir" Val="$PCACHEDIR/compile_simlib"/>
 12 |     <Option Name="CompiledLibDirXSim" Val=""/>
 13 |     <Option Name="CompiledLibDirModelSim" Val="$PCACHEDIR/compile_simlib/modelsim"/>
 14 |     <Option Name="CompiledLibDirQuesta" Val="$PCACHEDIR/compile_simlib/questa"/>
 15 |     <Option Name="CompiledLibDirIES" Val="$PCACHEDIR/compile_simlib/ies"/>
 16 |     <Option Name="CompiledLibDirXcelium" Val="$PCACHEDIR/compile_simlib/xcelium"/>
 17 |     <Option Name="CompiledLibDirVCS" Val="$PCACHEDIR/compile_simlib/vcs"/>
 18 |     <Option Name="CompiledLibDirRiviera" Val="$PCACHEDIR/compile_simlib/riviera"/>
 19 |     <Option Name="CompiledLibDirActivehdl" Val="$PCACHEDIR/compile_simlib/activehdl"/>
 20 |     <Option Name="TargetLanguage" Val="VHDL"/>
 21 |     <Option Name="BoardPart" Val="digilentinc.com:arty-z7-20:part0:1.0"/>
 22 |     <Option Name="BoardPartRepoPaths" Val="$PPRDIR/../../../../AppData/Roaming/Xilinx/Vivado/2019.2/xhub/board_store"/>
 23 |     <Option Name="ActiveSimSet" Val="sim_1"/>
 24 |     <Option Name="DefaultLib" Val="xil_defaultlib"/>
 25 |     <Option Name="ProjectType" Val="Default"/>
 26 |     <Option Name="IPOutputRepo" Val="$PCACHEDIR/ip"/>
 27 |     <Option Name="IPCachePermission" Val="read"/>
 28 |     <Option Name="IPCachePermission" Val="write"/>
 29 |     <Option Name="EnableCoreContainer" Val="FALSE"/>
 30 |     <Option Name="CreateRefXciForCoreContainers" Val="FALSE"/>
 31 |     <Option Name="IPUserFilesDir" Val="$PIPUSERFILESDIR"/>
 32 |     <Option Name="IPStaticSourceDir" Val="$PIPUSERFILESDIR/ipstatic"/>
 33 |     <Option Name="EnableBDX" Val="FALSE"/>
 34 |     <Option Name="DSABoardId" Val="arty-z7-20"/>
 35 |     <Option Name="WTXSimLaunchSim" Val="0"/>
 36 |     <Option Name="WTModelSimLaunchSim" Val="0"/>
 37 |     <Option Name="WTQuestaLaunchSim" Val="0"/>
 38 |     <Option Name="WTIesLaunchSim" Val="0"/>
 39 |     <Option Name="WTVcsLaunchSim" Val="0"/>
 40 |     <Option Name="WTRivieraLaunchSim" Val="0"/>
 41 |     <Option Name="WTActivehdlLaunchSim" Val="0"/>
 42 |     <Option Name="WTXSimExportSim" Val="0"/>
 43 |     <Option Name="WTModelSimExportSim" Val="0"/>
 44 |     <Option Name="WTQuestaExportSim" Val="0"/>
 45 |     <Option Name="WTIesExportSim" Val="0"/>
 46 |     <Option Name="WTVcsExportSim" Val="0"/>
 47 |     <Option Name="WTRivieraExportSim" Val="0"/>
 48 |     <Option Name="WTActivehdlExportSim" Val="0"/>
 49 |     <Option Name="GenerateIPUpgradeLog" Val="TRUE"/>
 50 |     <Option Name="XSimRadix" Val="hex"/>
 51 |     <Option Name="XSimTimeUnit" Val="ns"/>
 52 |     <Option Name="XSimArrayDisplayLimit" Val="1024"/>
 53 |     <Option Name="XSimTraceLimit" Val="65536"/>
 54 |     <Option Name="SimTypes" Val="rtl"/>
 55 |     <Option Name="SimTypes" Val="bfm"/>
 56 |     <Option Name="SimTypes" Val="tlm"/>
 57 |     <Option Name="SimTypes" Val="tlm_dpi"/>
 58 |     <Option Name="MEMEnableMemoryMapGeneration" Val="TRUE"/>
 59 |     <Option Name="DcpsUptoDate" Val="TRUE"/>
 60 |   </Configuration>
 61 |   <FileSets Version="1" Minor="31">
 62 |     <FileSet Name="sources_1" Type="DesignSrcs" RelSrcDir="$PSRCDIR/sources_1">
 63 |       <Filter Type="Srcs"/>
 64 |       <File Path="$PSRCDIR/asynchronous/infrastructure/ncl/ncl.vhdl">
 65 |         <FileInfo SFType="VHDL2008">
 66 |           <Attr Name="Library" Val="async_ncl"/>
 67 |           <Attr Name="UsedIn" Val="synthesis"/>
 68 |           <Attr Name="UsedIn" Val="simulation"/>
 69 |         </FileInfo>
 70 |       </File>
 71 |       <File Path="$PSRCDIR/asynchronous/infrastructure/handshake/register.vhdl">
 72 |         <FileInfo SFType="VHDL2008">
 73 |           <Attr Name="Library" Val="xil_defaultlib"/>
 74 |           <Attr Name="UsedIn" Val="synthesis"/>
 75 |           <Attr Name="UsedIn" Val="simulation"/>
 76 |         </FileInfo>
 77 |       </File>
 78 |       <File Path="$PSRCDIR/asynchronous/infrastructure/handshake/handshake.vhdl">
 79 |         <FileInfo SFType="VHDL2008">
 80 |           <Attr Name="Library" Val="xil_defaultlib"/>
 81 |           <Attr Name="UsedIn" Val="synthesis"/>
 82 |           <Attr Name="UsedIn" Val="simulation"/>
 83 |         </FileInfo>
 84 |       </File>
 85 |       <File Path="$PSRCDIR/asynchronous/cpu/ALU/insn/insn_two_register.vhdl">
 86 |         <FileInfo SFType="VHDL2008">
 87 |           <Attr Name="UsedIn" Val="synthesis"/>
 88 |           <Attr Name="UsedIn" Val="simulation"/>
 89 |         </FileInfo>
 90 |       </File>
 91 |       <File Path="$PSRCDIR/asynchronous/cpu/ALU/I/and.vhdl">
 92 |         <FileInfo SFType="VHDL2008">
 93 |           <Attr Name="UsedIn" Val="synthesis"/>
 94 |           <Attr Name="UsedIn" Val="simulation"/>
 95 |         </FileInfo>
 96 |       </File>
 97 |       <File Path="$PSRCDIR/asynchronous/cpu/adders/adder.vhdl">
 98 |         <FileInfo SFType="VHDL2008">
 99 |           <Attr Name="AutoDisabled" Val="1"/>
100 |           <Attr Name="UsedIn" Val="synthesis"/>
101 |           <Attr Name="UsedIn" Val="simulation"/>
102 |         </FileInfo>
103 |       </File>
104 |       <File Path="$PSRCDIR/asynchronous/cpu/shifters/barrel_shifter_no_signex.vhdl">
105 |         <FileInfo SFType="VHDL2008">
106 |           <Attr Name="AutoDisabled" Val="1"/>
107 |           <Attr Name="UsedIn" Val="synthesis"/>
108 |           <Attr Name="UsedIn" Val="simulation"/>
109 |         </FileInfo>
110 |       </File>
111 |       <File Path="$PSRCDIR/asynchronous/infrastructure/transceiver/transceiver_async_to_sync.vhdl">
112 |         <FileInfo SFType="VHDL2008">
113 |           <Attr Name="Library" Val="xil_defaultlib"/>
114 |           <Attr Name="AutoDisabled" Val="1"/>
115 |           <Attr Name="UsedIn" Val="synthesis"/>
116 |           <Attr Name="UsedIn" Val="simulation"/>
117 |         </FileInfo>
118 |       </File>
119 |       <File Path="$PSRCDIR/asynchronous/infrastructure/transceiver/transceiver_sync_to_async.vhdl">
120 |         <FileInfo SFType="VHDL2008">
121 |           <Attr Name="Library" Val="xil_defaultlib"/>
122 |           <Attr Name="AutoDisabled" Val="1"/>
123 |           <Attr Name="UsedIn" Val="synthesis"/>
124 |           <Attr Name="UsedIn" Val="simulation"/>
125 |         </FileInfo>
126 |       </File>
127 |       <File Path="$PSRCDIR/asynchronous/cpu/shifters/barrel_shifter.vhdl">
128 |         <FileInfo SFType="VHDL2008">
129 |           <Attr Name="AutoDisabled" Val="1"/>
130 |           <Attr Name="UsedIn" Val="synthesis"/>
131 |           <Attr Name="UsedIn" Val="simulation"/>
132 |         </FileInfo>
133 |       </File>
134 |       <File Path="$PSRCDIR/asynchronous/cpu/adders/shcadder.vhdl">
135 |         <FileInfo SFType="VHDL2008">
136 |           <Attr Name="AutoDisabled" Val="1"/>
137 |           <Attr Name="UsedIn" Val="synthesis"/>
138 |           <Attr Name="UsedIn" Val="simulation"/>
139 |         </FileInfo>
140 |       </File>
141 |       <File Path="$PSRCDIR/asynchronous/cpu/ALU/I/shift.vhdl">
142 |         <FileInfo SFType="VHDL2008">
143 |           <Attr Name="AutoDisabled" Val="1"/>
144 |           <Attr Name="UsedIn" Val="synthesis"/>
145 |           <Attr Name="UsedIn" Val="simulation"/>
146 |         </FileInfo>
147 |       </File>
148 |       <File Path="$PSRCDIR/asynchronous/cpu/pipelines/simple-pipeline.vhdl">
149 |         <FileInfo SFType="VHDL2008">
150 |           <Attr Name="AutoDisabled" Val="1"/>
151 |           <Attr Name="UsedIn" Val="synthesis"/>
152 |           <Attr Name="UsedIn" Val="simulation"/>
153 |         </FileInfo>
154 |       </File>
155 |       <Config>
156 |         <Option Name="DesignMode" Val="RTL"/>
157 |         <Option Name="TopModule" Val="e_riscv_insn_async_2reg"/>
158 |         <Option Name="TopAutoSet" Val="TRUE"/>
159 |       </Config>
160 |     </FileSet>
161 |     <FileSet Name="constrs_1" Type="Constrs" RelSrcDir="$PSRCDIR/constrs_1">
162 |       <Filter Type="Constrs"/>
163 |       <Config>
164 |         <Option Name="ConstrsType" Val="XDC"/>
165 |       </Config>
166 |     </FileSet>
167 |     <FileSet Name="sim_1" Type="SimulationSrcs" RelSrcDir="$PSRCDIR/sim_1">
168 |       <Filter Type="Srcs"/>
169 |       <Config>
170 |         <Option Name="DesignMode" Val="RTL"/>
171 |         <Option Name="TopModule" Val="e_riscv_insn_async_2reg"/>
172 |         <Option Name="TopLib" Val="xil_defaultlib"/>
173 |         <Option Name="TopAutoSet" Val="TRUE"/>
174 |         <Option Name="TransportPathDelay" Val="0"/>
175 |         <Option Name="TransportIntDelay" Val="0"/>
176 |         <Option Name="SelectedSimModel" Val="rtl"/>
177 |         <Option Name="SrcSet" Val="sources_1"/>
178 |       </Config>
179 |     </FileSet>
180 |     <FileSet Name="utils_1" Type="Utils" RelSrcDir="$PSRCDIR/utils_1">
181 |       <Filter Type="Utils"/>
182 |       <Config>
183 |         <Option Name="TopAutoSet" Val="TRUE"/>
184 |       </Config>
185 |     </FileSet>
186 |   </FileSets>
187 |   <Simulators>
188 |     <Simulator Name="XSim">
189 |       <Option Name="Description" Val="Vivado Simulator"/>
190 |       <Option Name="CompiledLib" Val="0"/>
191 |     </Simulator>
192 |     <Simulator Name="ModelSim">
193 |       <Option Name="Description" Val="ModelSim Simulator"/>
194 |     </Simulator>
195 |     <Simulator Name="Questa">
196 |       <Option Name="Description" Val="Questa Advanced Simulator"/>
197 |     </Simulator>
198 |     <Simulator Name="Riviera">
199 |       <Option Name="Description" Val="Riviera-PRO Simulator"/>
200 |     </Simulator>
201 |     <Simulator Name="ActiveHDL">
202 |       <Option Name="Description" Val="Active-HDL Simulator"/>
203 |     </Simulator>
204 |   </Simulators>
205 |   <Runs Version="1" Minor="11">
206 |     <Run Id="synth_1" Type="Ft3:Synth" SrcSet="sources_1" Part="xc7z020clg400-1" ConstrsSet="constrs_1" Description="Vivado Synthesis Defaults" AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" IncludeInArchive="true">
207 |       <Strategy Version="1" Minor="2">
208 |         <StratHandle Name="Vivado Synthesis Defaults" Flow="Vivado Synthesis 2019"/>
209 |         <Step Id="synth_design"/>
210 |       </Strategy>
211 |       <ReportStrategy Name="Vivado Synthesis Default Reports" Flow="Vivado Synthesis 2019"/>
212 |       <Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
213 |       <RQSFiles/>
214 |     </Run>
215 |     <Run Id="impl_1" Type="Ft2:EntireDesign" Part="xc7z020clg400-1" ConstrsSet="constrs_1" Description="Default settings for Implementation." AutoIncrementalCheckpoint="false" WriteIncrSynthDcp="false" State="current" SynthRun="synth_1" IncludeInArchive="true" GenFullBitstream="true">
216 |       <Strategy Version="1" Minor="2">
217 |         <StratHandle Name="Vivado Implementation Defaults" Flow="Vivado Implementation 2019"/>
218 |         <Step Id="init_design"/>
219 |         <Step Id="opt_design"/>
220 |         <Step Id="power_opt_design"/>
221 |         <Step Id="place_design"/>
222 |         <Step Id="post_place_power_opt_design"/>
223 |         <Step Id="phys_opt_design" EnableStepBool="1"/>
224 |         <Step Id="route_design"/>
225 |         <Step Id="post_route_phys_opt_design"/>
226 |         <Step Id="write_bitstream"/>
227 |       </Strategy>
228 |       <ReportStrategy Name="Vivado Implementation Default Reports" Flow="Vivado Implementation 2019"/>
229 |       <Report Name="ROUTE_DESIGN.REPORT_METHODOLOGY" Enabled="1"/>
230 |       <RQSFiles/>
231 |     </Run>
232 |   </Runs>
233 |   <Board>
234 |     <Jumpers/>
235 |   </Board>
236 |   <DashboardSummary Version="1" Minor="0">
237 |     <Dashboards>
238 |       <Dashboard Name="default_dashboard">
239 |         <Gadgets>
240 |           <Gadget Name="drc_1" Type="drc" Version="1" Row="2" Column="0">
241 |             <GadgetParam Name="REPORTS" Type="string_list" Value="impl_1#impl_1_route_report_drc_0 "/>
242 |           </Gadget>
243 |           <Gadget Name="methodology_1" Type="methodology" Version="1" Row="2" Column="1">
244 |             <GadgetParam Name="REPORTS" Type="string_list" Value="impl_1#impl_1_route_report_methodology_0 "/>
245 |           </Gadget>
246 |           <Gadget Name="power_1" Type="power" Version="1" Row="1" Column="0">
247 |             <GadgetParam Name="REPORTS" Type="string_list" Value="impl_1#impl_1_route_report_power_0 "/>
248 |           </Gadget>
249 |           <Gadget Name="timing_1" Type="timing" Version="1" Row="0" Column="1">
250 |             <GadgetParam Name="REPORTS" Type="string_list" Value="impl_1#impl_1_route_report_timing_summary_0 "/>
251 |           </Gadget>
252 |           <Gadget Name="utilization_1" Type="utilization" Version="1" Row="0" Column="0">
253 |             <GadgetParam Name="REPORTS" Type="string_list" Value="synth_1#synth_1_synth_report_utilization_0 "/>
254 |             <GadgetParam Name="RUN.STEP" Type="string" Value="synth_design"/>
255 |             <GadgetParam Name="RUN.TYPE" Type="string" Value="synthesis"/>
256 |           </Gadget>
257 |           <Gadget Name="utilization_2" Type="utilization" Version="1" Row="1" Column="1">
258 |             <GadgetParam Name="REPORTS" Type="string_list" Value="impl_1#impl_1_place_report_utilization_0 "/>
259 |           </Gadget>
260 |         </Gadgets>
261 |       </Dashboard>
262 |       <CurrentDashboard>default_dashboard</CurrentDashboard>
263 |     </Dashboards>
264 |   </DashboardSummary>
265 | </Project>
266 | 


--------------------------------------------------------------------------------