Skip to content

Commit

Permalink
add write 0s to bitstream for end-to-end, add sim func to write 0s to…
Browse files Browse the repository at this point in the history
… last rows and cols
  • Loading branch information
yuchen-mei committed Sep 13, 2024
1 parent 0c58f38 commit 7ea9427
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 7 deletions.
33 changes: 30 additions & 3 deletions garnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,40 @@ def fix_pond_flush_bug(self, placement, routing):
bitstream += self.interconnect.get_node_bitstream_config(source_node, dest_node)
return bitstream

def write_zero_to_config_regs(self, bitstream):
from gemstone.common.configurable import ConfigRegister

# This is a fix for the Onyx pond hardware to avoid random flushes in the pond from previous layers
for loc, tile in self.interconnect.tile_circuits.items():
for feature in tile.features():
for child in feature.children():
if isinstance(child, ConfigRegister):
# comment out these lines if want to debug reg name
# if feature.instance_name is None:
# print(tile.instance_name, feature.name() + "_inst0", child.instance_name, "none")
# else:
# print(tile.instance_name, feature.instance_name, child.instance_name)
if feature.instance_name: # write zeros to all config registers of interconnect
feature_addr = tile.features().index(feature)
child_addr = child.addr
tile_id_width = tile.tile_id_width
slice_start = tile.feature_config_slice.start
tile_id = self.interconnect.get_tile_id(*loc)
addr = (
tile_id
| (child_addr << slice_start)
| (feature_addr << tile_id_width)
)
bitstream.append((addr,0))

def generate_bitstream(self, halide_src, placement, routing, id_to_name, instance_to_instr, netlist, bus,
compact=False):
compact=False, end_to_end=True):
routing_fix = archipelago.power.reduce_switching(routing, self.interconnect,
compact=compact)
routing.update(routing_fix)

bitstream = []
if end_to_end: self.write_zero_to_config_regs(bitstream)
bitstream += self.interconnect.get_route_bitstream(routing)
bitstream += self.fix_pond_flush_bug(placement, routing)
bitstream += self.get_placement_bitstream(placement, id_to_name,
Expand All @@ -616,7 +643,7 @@ def generate_bitstream(self, halide_src, placement, routing, id_to_name, instanc
skip_addr = self.interconnect.get_skip_addr()
bitstream = compress_config_data(bitstream, skip_compression=skip_addr)
inputs, outputs = self.get_input_output(netlist)
input_interface, output_interface,\
input_interface, output_interface, \
(reset, valid, en) = self.get_io_interface(inputs,
outputs,
placement,
Expand Down
65 changes: 61 additions & 4 deletions global_buffer/io_placement.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,65 @@
import re
import os
import json


def atoi(text):
return int(text) if text.isdigit() else text

def natural_keys(text):
return [ atoi(c) for c in re.split(r'(\d+)', text) ]

def natural_keys(text):
return [atoi(c) for c in re.split(r'(\d+)', text)]

def parse_glb_bank_config(app_dir, id_to_name, inputs, outputs, valid, placement):
# parse the glb_bank_config.json to specify bank locations
with open(app_dir + "/glb_bank_config.json", "r") as f:
glb_json = json.load(f)

# Handling inputs
input_types = glb_json["inputs"].keys()
inputs_dict = {input_type: [] for input_type in input_types}
index_counters = {input_type: 0 for input_type in input_types}

for input_blk_id in inputs:
input_blk_name = id_to_name[input_blk_id]
type_name = next((input_type for input_type in input_types if input_type in input_blk_name), None)
if type_name:
dict_idx = index_counters[type_name]
coordinate = (glb_json["inputs"][type_name][dict_idx], 0)
inputs_dict[type_name].append({input_blk_id: coordinate})
index_counters[type_name] += 1

# Handling outputs
output_types = glb_json["outputs"].keys()
outputs_dict = {output_type: [] for output_type in output_types}
index_counters = {output_type: 0 for output_type in output_types}

for idx, output_blk_id in enumerate(outputs):
output_blk_name = id_to_name[output_blk_id]
type_name = next((output_type for output_type in output_types if output_type in output_blk_name), None)
if type_name:
dict_idx = index_counters[type_name]
coordinate = (glb_json["outputs"][type_name][dict_idx], 0)
outputs_dict[type_name].append({output_blk_id: coordinate})
outputs_dict[type_name].append({valid[idx]: coordinate})
index_counters[type_name] += 1

# Assert that all the inputs and outputs have been placed
assert sum(len(coords) for coords in inputs_dict.values()) == len(inputs), "Inputs in glb_bank_config.json do not match the number of inputs in the design"
assert sum(len(coords) for coords in outputs_dict.values()) // 2 == len(outputs), "Outputs in glb_bank_config.json do not match the number of outputs in the design"

# Update the placement dictionary with input coordinates
for type_name, coord_list in inputs_dict.items():
for coord_dict in coord_list:
for blk_id, coord in coord_dict.items():
placement[blk_id] = coord

# Update the placement dictionary with output coordinates
for type_name, coord_list in outputs_dict.items():
for coord_dict in coord_list:
for blk_id, coord in coord_dict.items():
placement[blk_id] = coord
return placement

def place_io_blk(id_to_name, app_dir):
"""Hacky function to place the IO blocks"""
Expand All @@ -21,7 +74,7 @@ def place_io_blk(id_to_name, app_dir):
# Human sort thing from Kalhan used in GLB scripts
id_to_name_list.sort(key=lambda x: natural_keys(x[1]))

blks = [blk for (blk,_) in id_to_name_list]
blks = [blk for (blk, _) in id_to_name_list]

placement = {}
# find out all the IO blocks
Expand All @@ -31,7 +84,6 @@ def place_io_blk(id_to_name, app_dir):
assert blk_id not in ios
ios.append(blk_id)


# need to know if it's an input or output

reset = None
Expand Down Expand Up @@ -92,4 +144,9 @@ def place_io_blk(id_to_name, app_dir):
for dat in data:
name, x, y = tuple(dat.split(" "))
placement[name] = (x.strip(), y.strip())

# parse the glb_bank_config.json to specify bank locations
if os.path.isfile(app_dir + "/glb_bank_config.json"):
placement = parse_glb_bank_config(app_dir, id_to_name, inputs, outputs, valid, placement)

return placement
11 changes: 11 additions & 0 deletions tests/test_app/tb/environment.sv
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ task Environment::read_data(Kernel kernel);
// "Hierarchical reference to a structure array member connected to task ref-port is not supported"
// proc_drv.read_data(kernel.outputs[i].io_tiles[j].start_addr,
// kernel.outputs[i].io_tiles[j].io_block_data);

// $display("[%s] clear last rows and columns of output_%0d_block_%0d", kernel.name, i, j);
// // Note that C should be channel number per GLB tile
// proc_drv.clear_last_rows_and_columns(
// kernel.outputs[i].io_tiles[j].start_addr,
// 4,
// 56,
// 56,
// 1
// );

data_q = new[kernel.outputs[i].io_tiles[j].io_block_data.size()];
proc_drv.read_data(kernel.outputs[i].io_tiles[j].start_addr, data_q);
kernel.outputs[i].io_tiles[j].io_block_data = data_q;
Expand Down
56 changes: 56 additions & 0 deletions tests/test_app/tb/proc_driver.sv
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ class ProcDriver;
extern task write_data(int start_addr, data_array_t data_q);
extern task write(int addr, bit [BANK_DATA_WIDTH-1:0] data);
extern task read_data(int start_addr, ref data_array_t data_q);
extern task write_byte(int addr, bit [BANK_DATA_WIDTH-1:0] data, int byte_offset);
extern task clear_last_rows_and_columns(int start_addr, int C, int X, int Y, int trunc_size);
endclass

function ProcDriver::new(vProcIfcDriver vif, semaphore proc_lock);
Expand Down Expand Up @@ -103,3 +105,57 @@ task ProcDriver::read_data(int start_addr, ref data_array_t data_q);
repeat (10) @(vif.cbd);
proc_lock.put(1);
endtask

task ProcDriver::write_byte(int addr, bit [BANK_DATA_WIDTH-1:0] data, int byte_offset);
bit [7:0] wr_strb; // Strobe for byte-level control

// Calculate the write strobe based on the byte offset
wr_strb = 8'b0; // Clear all strobe bits
wr_strb[byte_offset] = 1'b1; // Set strobe for the lower byte of the desired 16-bit word
wr_strb[byte_offset + 1] = 1'b1; // Set strobe for the upper byte of the desired 16-bit word

vif.cbd.wr_en <= 1'b1;
vif.cbd.wr_strb <= wr_strb;
vif.cbd.wr_addr <= addr;
vif.cbd.wr_data <= data;
@(vif.cbd);
vif.cbd.wr_en <= 0;
vif.cbd.wr_strb <= 0;
vif.cbd.wr_addr <= 0;
vif.cbd.wr_data <= 0;
endtask

task ProcDriver::clear_last_rows_and_columns(int start_addr, int C, int X, int Y, int trunc_size);
bit [GLB_ADDR_WIDTH-1:0] cur_addr;
bit [BANK_DATA_WIDTH-1:0] zero_data = 0;
int ch, x, y;
int byte_offset; // Byte offset for writing

proc_lock.get(1);
assert (BANK_DATA_WIDTH == 64);
assert (CGRA_BYTE_OFFSET == 1);

// Loop over each channel
for (ch = 0; ch < C; ch++) begin
// Calculate byte offset for current channel
byte_offset = (ch % 4) << CGRA_BYTE_OFFSET; // 4 = BANK_DATA_WIDTH / WORD_WIDTH

// Clear the last trunc_size rows for each channel
for (x = 0; x < X; x++) begin
for (int tr = Y - trunc_size; tr < Y; tr++) begin
cur_addr = start_addr + ((tr * X * C + x * C + ch) << CGRA_BYTE_OFFSET); // Calculate the address for each element in the last trunc_size rows
write_byte(cur_addr, zero_data, byte_offset); // Write zero data
end
end

// Clear the last trunc_size columns for each channel
for (y = 0; y < Y; y++) begin
for (int tc = X - trunc_size; tc < X; tc++) begin
cur_addr = start_addr + ((y * X * C + tc * C + ch) << CGRA_BYTE_OFFSET); // Calculate the address for each element in the last trunc_size columns
write_byte(cur_addr, zero_data, byte_offset); // Write zero data
end
end
end

proc_lock.put(1); // Release lock
endtask

0 comments on commit 7ea9427

Please sign in to comment.