From fd8f3bc1f04c3f683e6a5fa893504252363f2fbd Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Fri, 5 Jan 2024 15:35:35 +0100 Subject: [PATCH] add simdAdd example doc --- source/VexiiRiscv/Execute/custom.rst | 239 +++++++++++++++++++++++ source/VexiiRiscv/Execute/index.rst | 152 +------------- source/VexiiRiscv/Execute/plugins.rst | 140 +++++++++++++ source/VexiiRiscv/Introduction/index.rst | 1 - 4 files changed, 383 insertions(+), 149 deletions(-) create mode 100644 source/VexiiRiscv/Execute/custom.rst create mode 100644 source/VexiiRiscv/Execute/plugins.rst diff --git a/source/VexiiRiscv/Execute/custom.rst b/source/VexiiRiscv/Execute/custom.rst new file mode 100644 index 0000000..2d4e86a --- /dev/null +++ b/source/VexiiRiscv/Execute/custom.rst @@ -0,0 +1,239 @@ +Custom instruction +============================== + +There are multiple ways you can add custom instructions into VexiiRiscv. The following chapter will provide some demo. + +SIMD add +----------- + +Let's define a plugin which will implement a SIMD add (4x8bits adder), working on the integer register file. + +The plugin will be based on the ExecutionUnitElementSimple which makes implementing ALU plugins simpler. Such a plugin can then be used to compose a given execution lane layer + +For instance the Plugin configuration could be : + +.. code:: scala + + plugins += new SrcPlugin(early0, executeAt = 0, relaxedRs = relaxedSrc) + plugins += new IntAluPlugin(early0, formatAt = 0) + plugins += new BarrelShifterPlugin(early0, formatAt = relaxedShift.toInt) + plugins += new IntFormatPlugin("lane0") + plugins += new BranchPlugin(early0, aluAt = 0, jumpAt = relaxedBranch.toInt, wbAt = 0) + plugins += new SimdAddPlugin(early0) // <- We will implement this plugin + +Plugin implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Here is a example how this plugin could be implemented : + +- https://github.com/SpinalHDL/VexiiRiscv/blob/dev/src/main/scala/vexiiriscv/execute/SimdAddPlugin.scala + +.. code:: scala + + package vexiiriscv.execute + + import spinal.core._ + import spinal.lib._ + import spinal.lib.pipeline.Stageable + import vexiiriscv.Generate.args + import vexiiriscv.{Global, ParamSimple, VexiiRiscv} + import vexiiriscv.compat.MultiPortWritesSymplifier + import vexiiriscv.riscv.{IntRegFile, RS1, RS2, Riscv} + + //This plugin example will add a new instruction named SIMD_ADD which do the following : + // + //RD : Regfile Destination, RS : Regfile Source + //RD( 7 downto 0) = RS1( 7 downto 0) + RS2( 7 downto 0) + //RD(16 downto 8) = RS1(16 downto 8) + RS2(16 downto 8) + //RD(23 downto 16) = RS1(23 downto 16) + RS2(23 downto 16) + //RD(31 downto 24) = RS1(31 downto 24) + RS2(31 downto 24) + // + //Instruction encoding : + //0000000----------000-----0001011 <- Custom0 func3=0 func7=0 + // |RS2||RS1| |RD | + // + //Note : RS1, RS2, RD positions follow the RISC-V spec and are common for all instruction of the ISA + + + object SimdAddPlugin{ + //Define the instruction type and encoding that we wll use + val ADD4 = IntRegFile.TypeR(M"0000000----------000-----0001011") + } + + //ExecutionUnitElementSimple is a plugin base class which will integrate itself in a execute lane layer + //It provide quite a few utilities to ease the implementation of custom instruction. + //Here we will implement a plugin which provide SIMD add on the register file. + class SimdAddPlugin(val layer : LaneLayer) extends ExecutionUnitElementSimple(layer) { + + //Here we create an elaboration thread. The Logic class is provided by ExecutionUnitElementSimple to provide functionalities + val logic = during setup new Logic { + //Here we could have lock the elaboration of some other plugins (ex CSR), but here we don't need any of that + //as all is already sorted out in the Logic base class. + //So we just wait for the build phase + awaitBuild() + + //Let's assume we only support RV32 for now + assert(Riscv.XLEN.get == 32) + + //Let's get the hardware interface that we will use to provide the result of our custom instruction + val wb = newWriteback(ifp, 0) + + //Specify that the current plugin will implement the ADD4 instruction + val add4 = add(SimdAddPlugin.ADD4).spec + + //We need to specify on which stage we start using the register file values + add4.addRsSpec(RS1, executeAt = 0) + add4.addRsSpec(RS2, executeAt = 0) + + //Now that we are done specifying everything about the instructions, we can release the Logic.uopRetainer + //This will allow a few other plugins to continue their elaboration (ex : decoder, dispatcher, ...) + uopRetainer.release() + + //Let's define some logic in the execute lane [0] + val process = new el.Execute(id = 0) { + //Get the RISC-V RS1/RS2 values from the register file + val rs1 = el(IntRegFile, RS1).asUInt + val rs2 = el(IntRegFile, RS2).asUInt + + //Do some computation + val rd = UInt(32 bits) + rd( 7 downto 0) := rs1( 7 downto 0) + rs2( 7 downto 0) + rd(16 downto 8) := rs1(16 downto 8) + rs2(16 downto 8) + rd(23 downto 16) := rs1(23 downto 16) + rs2(23 downto 16) + rd(31 downto 24) := rs1(31 downto 24) + rs2(31 downto 24) + + //Provide the computation value for the writeback + wb.valid := SEL + wb.payload := rd.asBits + } + } + } + + +VexiiRiscv generation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Then, to generate a VexiiRiscv with this new plugin, we could run the following App : + +- Bottom of https://github.com/SpinalHDL/VexiiRiscv/blob/dev/src/main/scala/vexiiriscv/execute/SimdAddPlugin.scala + +.. code:: scala + + object VexiiSimdAddGen extends App { + val param = new ParamSimple() + val sc = SpinalConfig() + + assert(new scopt.OptionParser[Unit]("VexiiRiscv") { + help("help").text("prints this usage text") + param.addOptions(this) + }.parse(args, Unit).nonEmpty) + + sc.addTransformationPhase(new MultiPortWritesSymplifier) + val report = sc.generateVerilog { + val pa = param.pluginsArea() + pa.plugins += new SimdAddPlugin(pa.early0) + VexiiRiscv(pa.plugins) + } + } + + +To run this App, you can go to the NaxRiscv directory and run : + +.. code:: shell + + sbt "runMain vexiiriscv.execute.VexiiSimdAddGen" + +Software test +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Then let's write some assembly test code : (https://github.com/SpinalHDL/NaxSoftware/tree/849679c70b238ceee021bdfd18eb2e9809e7bdd0/baremetal/simdAdd) + +.. code:: shell + + .globl _start + _start: + + #include "../../driver/riscv_asm.h" + #include "../../driver/sim_asm.h" + #include "../../driver/custom_asm.h" + + //Test 1 + li x1, 0x01234567 + li x2, 0x01FF01FF + opcode_R(CUSTOM0, 0x0, 0x00, x3, x1, x2) //x3 = ADD4(x1, x2) + + //Print result value + li x4, PUT_HEX + sw x3, 0(x4) + + //Check result + li x5, 0x02224666 + bne x3, x5, fail + + j pass + + pass: + j pass + fail: + j fail + +Compile it with + +.. code:: shell + + make clean rv32im + +Simulation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You could run a simulation using this testbench : + +- Bottom of https://github.com/SpinalHDL/VexiiRiscv/blob/dev/src/main/scala/vexiiriscv/execute/SimdAddPlugin.scala + +.. code:: scala + + object VexiiSimdAddSim extends App{ + val param = new ParamSimple() + val testOpt = new TestOptions() + + val genConfig = SpinalConfig() + genConfig.includeSimulation + + val simConfig = SpinalSimConfig() + simConfig.withFstWave + simConfig.withTestFolder + simConfig.withConfig(genConfig) + + assert(new scopt.OptionParser[Unit]("VexiiRiscv") { + help("help").text("prints this usage text") + testOpt.addOptions(this) + param.addOptions(this) + }.parse(args, Unit).nonEmpty) + + println(s"With Vexiiriscv parm :\n - ${param.getName()}") + val compiled = simConfig.compile { + val pa = param.pluginsArea() + pa.plugins += new SimdAddPlugin(pa.early0) + VexiiRiscv(pa.plugins) + } + testOpt.test(compiled) + } + +Which can be run with : + +.. code:: shell + + sbt "runMain vexiiriscv.execute.VexiiSimdAddSim --load-elf ext/NaxSoftware/baremetal/simdAdd/build/rv32ima/simdAdd.elf --trace-all --no-rvls-check" + + +Which will output the value 02224666 in the shell and show traces in simWorkspace/VexiiRiscv/test :D + +Note that --no-rvls-check is required as spike do not implement that custom simdAdd. + +Conclusion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +So overall this example didn't introduce how to specify some additional decoding, nor how to define multi-cycle ALU. (TODO). +But you can take a look in the IntAluPlugin, ShiftPlugin, DivPlugin, MulPlugin and BranchPlugin which are doing those things using the same ExecutionUnitElementSimple base class. + + diff --git a/source/VexiiRiscv/Execute/index.rst b/source/VexiiRiscv/Execute/index.rst index 14e066c..0b982c7 100644 --- a/source/VexiiRiscv/Execute/index.rst +++ b/source/VexiiRiscv/Execute/index.rst @@ -1,153 +1,9 @@ Execute ============ -Many plugins operate in the fetch stage. Some provide infrastructures : -- ExecutePipelinePlugin -- ExecuteLanePlugin -- RegFilePlugin -- SrcPlugin -- RsUnsignedPlugin -- IntFormatPlugin -- WriteBackPlugin -- LearnPlugin +.. toctree:: + :maxdepth: 2 -Some implement regular instructions - -- IntAluPlugin -- BarrelShifterPlugin -- BranchPlugin -- MulPlugin -- DivPlugin -- LsuCachelessPlugin - -Some implement CSR, privileges and special instructions - -- CsrAccessPlugin -- CsrRamPlugin -- PrivilegedPlugin -- PerformanceCounterPlugin -- EnvPlugin - - -ExecutePipelinePlugin ------------------------ - -Provide the pipeline framework for all the execute related hardware with the following specificities : - -- It is based on the spinal.lib.misc.pipeline API and can host multiple "lanes" in it. -- For flow control, the lanes can only freeze the whole pipeline -- The pipeline do not collapse bubbles (empty stages) - - -ExecuteLanePlugin ------------------------ - -Implement an execution lane in the ExecutePipelinePlugin - -RegFilePlugin ------------------------ - -Implement one register file, with the possibility to create new read / write port on demande - -SrcPlugin ------------------------ - -Provide some early integer values which can mux between RS1/RS2 and multiple RISC-V instruction's literal values - -RsUnsignedPlugin ------------------------ - -Used by mul/div in order to get an unsigned RS1/RS2 value early in the pipeline - -IntFormatPlugin ------------------------ - -Alows plugins to write integer values back to the register file through a optional sign extender. -It uses WriteBackPlugin as value backend. - -WriteBackPlugin ------------------------ - -Used by plugins to provide the RD value to write back to the register file - -LearnPlugin ----------------- - -Will collect all interface which provide jump/branch learning interfaces to aggregate them into a single one, which will then be used by branch prediction plugins to learn. - -IntAluPlugin ------------------------ - -Implement the arithmetic, binary and literal instructions (ADD, SUB, AND, OR, LUI, ...) - -BarrelShifterPlugin ------------------------ - -Implement the shift instructions in a non-blocking way (no iterations). Fast but "heavy". - -BranchPlugin ------------------------ - -Will : - -- Implement branch/jump instruction -- Correct the PC / History in the case the branch prediction was wrong -- Provide a learn interface to the LearnPlugin - - -MulPlugin ------------------------ - -- Implement multiplication operation using partial multiplications and then summing their result -- Done over multiple stage -- Can optionaly extends the last stage for one cycle in order to buffer the MULH bits - -DivPlugin ------------------------ - -- Implement the division/remain -- 2 bits per cycle are solved. -- When it start, it scan for the numerator leading bits for 0, and can skip dividing them (can skip blocks of XLEN/4) - -LsuCachelessPlugin ------------------------ - -- Implement load / store through a cacheless memory bus -- Will fork the cmd as soon as fork stage is valid (with no flush) -- Handle backpresure by using a little fifo on the response data - -CsrAccessPlugin ------------------------ - -- Implement the CSR instruction -- Provide an API for other plugins to specify its hardware mapping - -CsrRamPlugin ------------------------ - -- Implement a shared on chip ram -- Provide an API which allows to staticaly allocate space on it -- Provide an API to create read / write ports on it -- Used by various plugins to store the CSR contents in a FPGA efficient way - -PrivilegedPlugin ------------------------ - -- Implement the RISCV privileged spec -- Implement the trap buffer / FSM -- Use the CsrRamPlugin to implement various CSR as MTVAL, MTVEC, MEPC, MSCRATCH, ... - -PerformanceCounterPlugin --------------------------------- - -- Implement the privileged performance counters in a very FPGA way -- Use the CsrRamPlugin to store most of the counter bits -- Use a dedicated 7 bits hardware register per counter -- Once that 7 bits register MSB is set, a FSM will flush it into the CsrRamPlugin - - -EnvPlugin ------------------------- - -- Implement a few instructions as MRET, SRET, ECALL, EBREAK + plugins + custom diff --git a/source/VexiiRiscv/Execute/plugins.rst b/source/VexiiRiscv/Execute/plugins.rst new file mode 100644 index 0000000..dbc59a6 --- /dev/null +++ b/source/VexiiRiscv/Execute/plugins.rst @@ -0,0 +1,140 @@ +Plugins +============ + + +infrastructures +------------------- + +Many plugins operate in the fetch stage. Some provide infrastructures : + +ExecutePipelinePlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Provide the pipeline framework for all the execute related hardware with the following specificities : + +- It is based on the spinal.lib.misc.pipeline API and can host multiple "lanes" in it. +- For flow control, the lanes can only freeze the whole pipeline +- The pipeline do not collapse bubbles (empty stages) + + +ExecuteLanePlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Implement an execution lane in the ExecutePipelinePlugin + +RegFilePlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Implement one register file, with the possibility to create new read / write port on demande + +SrcPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Provide some early integer values which can mux between RS1/RS2 and multiple RISC-V instruction's literal values + +RsUnsignedPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Used by mul/div in order to get an unsigned RS1/RS2 value early in the pipeline + +IntFormatPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Alows plugins to write integer values back to the register file through a optional sign extender. +It uses WriteBackPlugin as value backend. + +WriteBackPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Used by plugins to provide the RD value to write back to the register file + +LearnPlugin +^^^^^^^^^^^^^^^^^^^^^^^^ + +Will collect all interface which provide jump/branch learning interfaces to aggregate them into a single one, which will then be used by branch prediction plugins to learn. + +Instructions +------------------- + +Some implement regular instructions + +IntAluPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Implement the arithmetic, binary and literal instructions (ADD, SUB, AND, OR, LUI, ...) + +BarrelShifterPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Implement the shift instructions in a non-blocking way (no iterations). Fast but "heavy". + +BranchPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Will : + +- Implement branch/jump instruction +- Correct the PC / History in the case the branch prediction was wrong +- Provide a learn interface to the LearnPlugin + + +MulPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement multiplication operation using partial multiplications and then summing their result +- Done over multiple stage +- Can optionaly extends the last stage for one cycle in order to buffer the MULH bits + +DivPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement the division/remain +- 2 bits per cycle are solved. +- When it start, it scan for the numerator leading bits for 0, and can skip dividing them (can skip blocks of XLEN/4) + +LsuCachelessPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement load / store through a cacheless memory bus +- Will fork the cmd as soon as fork stage is valid (with no flush) +- Handle backpresure by using a little fifo on the response data + +Special +------------------- + +Some implement CSR, privileges and special instructions + +CsrAccessPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement the CSR instruction +- Provide an API for other plugins to specify its hardware mapping + +CsrRamPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement a shared on chip ram +- Provide an API which allows to staticaly allocate space on it +- Provide an API to create read / write ports on it +- Used by various plugins to store the CSR contents in a FPGA efficient way + +PrivilegedPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement the RISCV privileged spec +- Implement the trap buffer / FSM +- Use the CsrRamPlugin to implement various CSR as MTVAL, MTVEC, MEPC, MSCRATCH, ... + +PerformanceCounterPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement the privileged performance counters in a very FPGA way +- Use the CsrRamPlugin to store most of the counter bits +- Use a dedicated 7 bits hardware register per counter +- Once that 7 bits register MSB is set, a FSM will flush it into the CsrRamPlugin + + +EnvPlugin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Implement a few instructions as MRET, SRET, ECALL, EBREAK diff --git a/source/VexiiRiscv/Introduction/index.rst b/source/VexiiRiscv/Introduction/index.rst index 702e30c..afc9e66 100644 --- a/source/VexiiRiscv/Introduction/index.rst +++ b/source/VexiiRiscv/Introduction/index.rst @@ -1,7 +1,6 @@ Introduction ============ -Miaouuu .. toctree:: :maxdepth: 1