add write 0s to bitstream for end-to-end, add sim func to write 0s to…

… last rows and cols
StanfordAHA · Sep 13, 2024 · 7ea9427 · 7ea9427
1 parent 0c58f38
commit 7ea9427
Show file tree

Hide file tree

Showing 4 changed files with 158 additions and 7 deletions.
diff --git a/garnet.py b/garnet.py
@@ -601,13 +601,40 @@ def fix_pond_flush_bug(self, placement, routing):
                 bitstream += self.interconnect.get_node_bitstream_config(source_node, dest_node)
         return bitstream
 
+    def write_zero_to_config_regs(self, bitstream):
+        from gemstone.common.configurable import ConfigRegister
+
+        # This is a fix for the Onyx pond hardware to avoid random flushes in the pond from previous layers
+        for loc, tile in self.interconnect.tile_circuits.items():
+            for feature in tile.features():
+                for child in feature.children():
+                    if isinstance(child, ConfigRegister):
+                        # comment out these lines if want to debug reg name
+                        # if feature.instance_name is None:
+                        #     print(tile.instance_name, feature.name() + "_inst0", child.instance_name, "none")
+                        # else:
+                        #     print(tile.instance_name, feature.instance_name, child.instance_name)
+                        if feature.instance_name: # write zeros to all config registers of interconnect
+                            feature_addr = tile.features().index(feature)
+                            child_addr = child.addr
+                            tile_id_width = tile.tile_id_width
+                            slice_start = tile.feature_config_slice.start
+                            tile_id = self.interconnect.get_tile_id(*loc)
+                            addr = (
+                                tile_id
+                                | (child_addr << slice_start)
+                                | (feature_addr << tile_id_width)
+                            )
+                            bitstream.append((addr,0))
+
     def generate_bitstream(self, halide_src, placement, routing, id_to_name, instance_to_instr, netlist, bus,
-                           compact=False):
+                           compact=False, end_to_end=True):
         routing_fix = archipelago.power.reduce_switching(routing, self.interconnect,
                                                          compact=compact)
         routing.update(routing_fix)
-        
+
         bitstream = []
+        if end_to_end: self.write_zero_to_config_regs(bitstream)
         bitstream += self.interconnect.get_route_bitstream(routing)
         bitstream += self.fix_pond_flush_bug(placement, routing)
         bitstream += self.get_placement_bitstream(placement, id_to_name,
@@ -616,7 +643,7 @@ def generate_bitstream(self, halide_src, placement, routing, id_to_name, instanc
         skip_addr = self.interconnect.get_skip_addr()
         bitstream = compress_config_data(bitstream, skip_compression=skip_addr)
         inputs, outputs = self.get_input_output(netlist)
-        input_interface, output_interface,\
+        input_interface, output_interface, \
             (reset, valid, en) = self.get_io_interface(inputs,
                                                        outputs,
                                                        placement,

diff --git a/global_buffer/io_placement.py b/global_buffer/io_placement.py
@@ -1,12 +1,65 @@
 import re
 import os
+import json
+
 
 def atoi(text):
     return int(text) if text.isdigit() else text
 
-def natural_keys(text):
-    return [ atoi(c) for c in re.split(r'(\d+)', text) ]
 
+def natural_keys(text):
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+
+def parse_glb_bank_config(app_dir, id_to_name, inputs, outputs, valid, placement):
+    # parse the glb_bank_config.json to specify bank locations
+    with open(app_dir + "/glb_bank_config.json", "r") as f:
+        glb_json = json.load(f)
+
+    # Handling inputs
+    input_types = glb_json["inputs"].keys()
+    inputs_dict = {input_type: [] for input_type in input_types}
+    index_counters = {input_type: 0 for input_type in input_types}
+
+    for input_blk_id in inputs:
+        input_blk_name = id_to_name[input_blk_id]
+        type_name = next((input_type for input_type in input_types if input_type in input_blk_name), None)
+        if type_name:
+            dict_idx = index_counters[type_name]
+            coordinate = (glb_json["inputs"][type_name][dict_idx], 0)
+            inputs_dict[type_name].append({input_blk_id: coordinate})
+            index_counters[type_name] += 1
+
+    # Handling outputs
+    output_types = glb_json["outputs"].keys()
+    outputs_dict = {output_type: [] for output_type in output_types}
+    index_counters = {output_type: 0 for output_type in output_types}
+
+    for idx, output_blk_id in enumerate(outputs):
+        output_blk_name = id_to_name[output_blk_id]
+        type_name = next((output_type for output_type in output_types if output_type in output_blk_name), None)
+        if type_name:
+            dict_idx = index_counters[type_name]
+            coordinate = (glb_json["outputs"][type_name][dict_idx], 0)
+            outputs_dict[type_name].append({output_blk_id: coordinate})
+            outputs_dict[type_name].append({valid[idx]: coordinate})
+            index_counters[type_name] += 1
+
+    # Assert that all the inputs and outputs have been placed
+    assert sum(len(coords) for coords in inputs_dict.values()) == len(inputs), "Inputs in glb_bank_config.json do not match the number of inputs in the design"
+    assert sum(len(coords) for coords in outputs_dict.values()) // 2 == len(outputs), "Outputs in glb_bank_config.json do not match the number of outputs in the design"
+
+    # Update the placement dictionary with input coordinates
+    for type_name, coord_list in inputs_dict.items():
+        for coord_dict in coord_list:
+            for blk_id, coord in coord_dict.items():
+                placement[blk_id] = coord
+
+    # Update the placement dictionary with output coordinates
+    for type_name, coord_list in outputs_dict.items():
+        for coord_dict in coord_list:
+            for blk_id, coord in coord_dict.items():
+                placement[blk_id] = coord
+    return placement
 
 def place_io_blk(id_to_name, app_dir):
     """Hacky function to place the IO blocks"""
@@ -21,7 +74,7 @@ def place_io_blk(id_to_name, app_dir):
         # Human sort thing from Kalhan used in GLB scripts
         id_to_name_list.sort(key=lambda x: natural_keys(x[1]))
 
-        blks = [blk for (blk,_) in id_to_name_list]
+        blks = [blk for (blk, _) in id_to_name_list]
 
     placement = {}
     # find out all the IO blocks
@@ -31,7 +84,6 @@ def place_io_blk(id_to_name, app_dir):
             assert blk_id not in ios
             ios.append(blk_id)
 
-
     # need to know if it's an input or output
 
     reset = None
@@ -92,4 +144,9 @@ def place_io_blk(id_to_name, app_dir):
             for dat in data:
                 name, x, y = tuple(dat.split(" "))
                 placement[name] = (x.strip(), y.strip())
+
+    # parse the glb_bank_config.json to specify bank locations
+    if os.path.isfile(app_dir + "/glb_bank_config.json"):
+        placement = parse_glb_bank_config(app_dir, id_to_name, inputs, outputs, valid, placement)
+
     return placement
diff --git a/tests/test_app/tb/environment.sv b/tests/test_app/tb/environment.sv
@@ -106,6 +106,17 @@ task Environment::read_data(Kernel kernel);
             // "Hierarchical reference to a structure array member connected to task ref-port is not supported"
             // proc_drv.read_data(kernel.outputs[i].io_tiles[j].start_addr,
             //                    kernel.outputs[i].io_tiles[j].io_block_data);
+
+            // $display("[%s] clear last rows and columns of output_%0d_block_%0d", kernel.name, i, j);
+            // // Note that C should be channel number per GLB tile
+            // proc_drv.clear_last_rows_and_columns(
+            //     kernel.outputs[i].io_tiles[j].start_addr,
+            //     4,
+            //     56,
+            //     56,
+            //     1
+            // );
+
             data_q = new[kernel.outputs[i].io_tiles[j].io_block_data.size()];
             proc_drv.read_data(kernel.outputs[i].io_tiles[j].start_addr, data_q);
             kernel.outputs[i].io_tiles[j].io_block_data = data_q;

diff --git a/tests/test_app/tb/proc_driver.sv b/tests/test_app/tb/proc_driver.sv
@@ -7,6 +7,8 @@ class ProcDriver;
     extern task write_data(int start_addr, data_array_t data_q);
     extern task write(int addr, bit [BANK_DATA_WIDTH-1:0] data);
     extern task read_data(int start_addr, ref data_array_t data_q);
+    extern task write_byte(int addr, bit [BANK_DATA_WIDTH-1:0] data, int byte_offset);
+    extern task clear_last_rows_and_columns(int start_addr, int C, int X, int Y, int trunc_size);
 endclass
 
 function ProcDriver::new(vProcIfcDriver vif, semaphore proc_lock);
@@ -103,3 +105,57 @@ task ProcDriver::read_data(int start_addr, ref data_array_t data_q);
     repeat (10) @(vif.cbd);
     proc_lock.put(1);
 endtask
+
+task ProcDriver::write_byte(int addr, bit [BANK_DATA_WIDTH-1:0] data, int byte_offset);
+    bit [7:0] wr_strb; // Strobe for byte-level control
+
+    // Calculate the write strobe based on the byte offset
+    wr_strb = 8'b0; // Clear all strobe bits
+    wr_strb[byte_offset] = 1'b1; // Set strobe for the lower byte of the desired 16-bit word
+    wr_strb[byte_offset + 1] = 1'b1; // Set strobe for the upper byte of the desired 16-bit word
+
+    vif.cbd.wr_en   <= 1'b1;
+    vif.cbd.wr_strb <= wr_strb;
+    vif.cbd.wr_addr <= addr;
+    vif.cbd.wr_data <= data;
+    @(vif.cbd);
+    vif.cbd.wr_en   <= 0;
+    vif.cbd.wr_strb <= 0;
+    vif.cbd.wr_addr <= 0;
+    vif.cbd.wr_data <= 0;
+endtask
+
+task ProcDriver::clear_last_rows_and_columns(int start_addr, int C, int X, int Y, int trunc_size);
+    bit [GLB_ADDR_WIDTH-1:0] cur_addr;
+    bit [BANK_DATA_WIDTH-1:0] zero_data = 0;
+    int ch, x, y;
+    int byte_offset; // Byte offset for writing
+
+    proc_lock.get(1);
+    assert (BANK_DATA_WIDTH == 64);
+    assert (CGRA_BYTE_OFFSET == 1);
+
+    // Loop over each channel
+    for (ch = 0; ch < C; ch++) begin
+        // Calculate byte offset for current channel
+        byte_offset = (ch % 4) << CGRA_BYTE_OFFSET; // 4 = BANK_DATA_WIDTH / WORD_WIDTH
+
+        // Clear the last trunc_size rows for each channel
+        for (x = 0; x < X; x++) begin
+            for (int tr = Y - trunc_size; tr < Y; tr++) begin
+                cur_addr = start_addr + ((tr * X * C + x * C + ch) << CGRA_BYTE_OFFSET); // Calculate the address for each element in the last trunc_size rows
+                write_byte(cur_addr, zero_data, byte_offset); // Write zero data
+            end
+        end
+
+        // Clear the last trunc_size columns for each channel
+        for (y = 0; y < Y; y++) begin
+            for (int tc = X - trunc_size; tc < X; tc++) begin
+                cur_addr = start_addr + ((y * X * C + tc * C + ch) << CGRA_BYTE_OFFSET); // Calculate the address for each element in the last trunc_size columns
+                write_byte(cur_addr, zero_data, byte_offset); // Write zero data
+            end
+        end
+    end
+
+    proc_lock.put(1); // Release lock
+endtask