From 6fc20a7e7dacd209963c41cac5c86dc939777c4f Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 11 Nov 2022 16:08:31 -0800 Subject: [PATCH 01/63] creeping toward commonality --- .buildkite/pipelines/check_pe_area.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipelines/check_pe_area.sh b/.buildkite/pipelines/check_pe_area.sh index f712204ffb..85f51a658a 100755 --- a/.buildkite/pipelines/check_pe_area.sh +++ b/.buildkite/pipelines/check_pe_area.sh @@ -19,7 +19,7 @@ echo $pe_dir # "echo" to unglob why not resdir=`echo $pe_dir/*synthesis/results_syn` -egrep ^Tile_PE $resdir/final_area.rpt | awk -v max_area=11000 ' +egrep ^Tile_PE $resdir/final_area.rpt | awk -v max_area=8500 ' { printf("Total area: %d\n", $NF); if ($NF > max_area) { print "" From 393469c8441989246872b1b9533bed6a27ee51e2 Mon Sep 17 00:00:00 2001 From: steveri Date: Sat, 12 Nov 2022 16:22:52 -0800 Subject: [PATCH 02/63] creeping toward a common merged master branch --- .buildkite/pipelines/pmg.yml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index 86ad2f6832..426261f0a2 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -1,28 +1,30 @@ -# Agents are specified in "pipeline settings" now, see e.g. buildkite.com/tapeout-aha/steps -# agents: { jobsize: "hours" } - -# 04/2022 Now running tests in parallel -# Old runtime 30-35m, new runtime 20-25m +# Agents are specified in "pipeline settings", see e.g. buildkite.com/tapeout-aha/steps ############################################################################## -# Use this to test a specific branch/commit: +# TO TEST A SPECIFIC BRANCH/COMMIT +# ------------------------------------------------- # Add to env: # NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a # Add to commands: -# - pwd; git branch; git checkout $$NOV11 +# - pwd; git branch; git checkout $$NOV11 +# ------------------------------------------------- -############################################################################## -# Note: "echo exit 13" prevents hang at genus/innovus prompt env: SETUP: source mflowgen/bin/setup-buildkite.sh # Env var used by test_module.sh :( TEST_MODULE_SBFLAGS: '--skip_mflowgen' - # For debugging, use $BUILD flag to build in indicated local dir TEST: echo exit 13 | mflowgen/test/test_module.sh -# TEST: echo exit 13 | mflowgen/test/test_module.sh --build_dir /build/pmg${BUILDKITE_BUILD_NUMBER} + # For debugging, use build_dir flag to save build in indicated local dir + # TEST: echo exit 13 | mflowgen/test/test_module.sh --build_dir /build/pmg${BUILDKITE_BUILD_NUMBER} + + # Newer dockers use lake "sparse_strawman" branch offshoots :( + # Note as of 10/10/2022 this is the default (see common/rtl/gen_rtl.sh) + RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' + + # TODO/FIXME Figure out how to use a top-level parm file :( steps: @@ -42,10 +44,11 @@ steps: # INDIVIDUAL TILE RUNS # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# Set pe max width to 112: fp limits to 110 but then lvs says 112 OK - label: 'ptile init 20m' commands: - $TEST --need_space 30G full_chip tile_array Tile_PE --steps init --debug - - .buildkite/pipelines/check_tile_width.sh Tile_PE --max 110 + - .buildkite/pipelines/check_tile_width.sh Tile_PE --max 112 - mflowgen/bin/buildcheck.sh full_chip/*tile_array/*Tile_PE --show-all-errors - label: 'mtile init 25m' From c19381148c571888d8cb8a1431cb159cd69fcece Mon Sep 17 00:00:00 2001 From: steveri Date: Sat, 12 Nov 2022 16:43:03 -0800 Subject: [PATCH 03/63] converging on a single branch -- inconsequential bk pipes --- .buildkite/pipeline_fullchip.yml | 3 ++ .buildkite/pipeline_mflowgen.yml | 2 +- .buildkite/pipelines/check_tile_width.sh | 2 +- .buildkite/pipelines/glb_tile.yml | 2 - .buildkite/pipelines/glb_tile_only.yml | 44 ++++++++++++++++++++++ .buildkite/pipelines/mem_tile_only.yml | 15 +++++--- .buildkite/pipelines/pe_synth_only.yml | 3 +- .buildkite/pipelines/pe_tile_only.yml | 11 ++++-- .buildkite/pipelines/pmtile_synth_only.yml | 3 +- 9 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 .buildkite/pipelines/glb_tile_only.yml diff --git a/.buildkite/pipeline_fullchip.yml b/.buildkite/pipeline_fullchip.yml index 7ccb3febe7..4295540580 100644 --- a/.buildkite/pipeline_fullchip.yml +++ b/.buildkite/pipeline_fullchip.yml @@ -21,6 +21,9 @@ env: # Can use this to change target mflowgen branch # OVERRIDE_MFLOWGEN_BRANCH: glob-prob + # Newer dockers use lake "sparse_strawman" branch offshoots :( + RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' + steps: - label: 'setup' commands: diff --git a/.buildkite/pipeline_mflowgen.yml b/.buildkite/pipeline_mflowgen.yml index 481824ef13..de162a5e61 120000 --- a/.buildkite/pipeline_mflowgen.yml +++ b/.buildkite/pipeline_mflowgen.yml @@ -1 +1 @@ -pipelines/pmg.yml \ No newline at end of file +pipelines/pe_tile_only.yml \ No newline at end of file diff --git a/.buildkite/pipelines/check_tile_width.sh b/.buildkite/pipelines/check_tile_width.sh index bb44669528..c12e5692b3 100755 --- a/.buildkite/pipelines/check_tile_width.sh +++ b/.buildkite/pipelines/check_tile_width.sh @@ -6,7 +6,7 @@ # Also used in per-checkin CI test e.g. pmg.yml: # commands: # - $TEST --need_space 30G full_chip tile_array Tile_PE --steps init --debug -# - .buildkite/pipelines/check_pe_area.sh Tile_PE --max 110 +# - .buildkite/pipelines/check_pe_area.sh Tile_PE --max 112 function usage { cat < Date: Sat, 12 Nov 2022 17:12:36 -0800 Subject: [PATCH 04/63] restore original mflowgen tests --- .buildkite/pipeline_mflowgen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline_mflowgen.yml b/.buildkite/pipeline_mflowgen.yml index de162a5e61..481824ef13 120000 --- a/.buildkite/pipeline_mflowgen.yml +++ b/.buildkite/pipeline_mflowgen.yml @@ -1 +1 @@ -pipelines/pe_tile_only.yml \ No newline at end of file +pipelines/pmg.yml \ No newline at end of file From d472df2c76846c52552035a0c84a8d4ba8f2063c Mon Sep 17 00:00:00 2001 From: steveri Date: Sun, 13 Nov 2022 12:15:46 -0800 Subject: [PATCH 05/63] first test of conditional pipeline --- .buildkite/pipelines/pmg.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index 426261f0a2..a7a67953cb 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -40,6 +40,12 @@ steps: ' - wait +# TODO run rtl *only* if branch name matches spv-merge-to-spv.* +# branch is $BUILDKITE_BRANCH maybe +# see buildkite.com/pipelines/conditionals +# add "if: build.branch ~ /to-spv/" +# and "if: build.branch !~ /to-spv/" + ############################################################################## # INDIVIDUAL TILE RUNS # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore @@ -50,15 +56,24 @@ steps: - $TEST --need_space 30G full_chip tile_array Tile_PE --steps init --debug - .buildkite/pipelines/check_tile_width.sh Tile_PE --max 112 - mflowgen/bin/buildcheck.sh full_chip/*tile_array/*Tile_PE --show-all-errors + if: build.branch !~ /to-spv/ - label: 'mtile init 25m' commands: - $TEST --need_space 30G full_chip tile_array Tile_MemCore --steps init --debug - .buildkite/pipelines/check_tile_width.sh Tile_MemCore --max 250 - mflowgen/bin/buildcheck.sh full_chip/*tile_array/*Tile_MemCore --show-all-errors + if: build.branch !~ /to-spv/ - label: 'gtile init 20m' commands: - $TEST --need_space 30G full_chip glb_top glb_tile --steps init --debug - mflowgen/bin/buildcheck.sh full_chip/*glb_top/*glb_tile --show-all-errors + if: build.branch !~ /to-spv/ + +# GF branches cannot run synthesis on my machine, so do RTL only I guess +- label: 'RTL only' + commands: + - $TEST --need_space 30G full_chip --steps rtl --debug + if: build.branch ~ /to-spv/ From 4849cb6f58a54f426ba7dfdcaa14a70daa9d7655 Mon Sep 17 00:00:00 2001 From: steveri Date: Sun, 13 Nov 2022 12:17:09 -0800 Subject: [PATCH 06/63] first test of conditional pipeline II --- .buildkite/pipelines/pmg.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index a7a67953cb..9cd3df6087 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -75,5 +75,5 @@ steps: - label: 'RTL only' commands: - $TEST --need_space 30G full_chip --steps rtl --debug - if: build.branch ~ /to-spv/ + if: build.branch =~ /to-spv/ From c628fec252a4c4588bd49127d6e859e15b542b3a Mon Sep 17 00:00:00 2001 From: steveri Date: Sun, 13 Nov 2022 12:21:35 -0800 Subject: [PATCH 07/63] first test of conditional pipeline III --- .buildkite/pipelines/pmg.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index 9cd3df6087..281ed7bd78 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -74,6 +74,7 @@ steps: # GF branches cannot run synthesis on my machine, so do RTL only I guess - label: 'RTL only' commands: - - $TEST --need_space 30G full_chip --steps rtl --debug + - 'export RTL_DOCKER_IMAGE=stanfordaha/garnet:latest; + $TEST --need_space 30G full_chip --steps rtl --debug' if: build.branch =~ /to-spv/ From f1779f9ddde6ff9d574722ef17356157fa4e4b8f Mon Sep 17 00:00:00 2001 From: steveri Date: Sun, 13 Nov 2022 12:22:44 -0800 Subject: [PATCH 08/63] conditional pipeline IV: expand number of branch cases --- .buildkite/pipelines/pmg.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index 281ed7bd78..c2d0338e4c 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -76,5 +76,5 @@ steps: commands: - 'export RTL_DOCKER_IMAGE=stanfordaha/garnet:latest; $TEST --need_space 30G full_chip --steps rtl --debug' - if: build.branch =~ /to-spv/ + if: build.branch =~ /to-spv/ || build.branch =~ /spV/ From cec7500764d49072ddf9aa90e00808d94c1b72e2 Mon Sep 17 00:00:00 2001 From: steveri Date: Mon, 14 Nov 2022 13:50:05 -0800 Subject: [PATCH 09/63] refined mflowgen test --- .buildkite/pipelines/pmg.yml | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index c2d0338e4c..ad9efd7983 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -40,36 +40,34 @@ steps: ' - wait -# TODO run rtl *only* if branch name matches spv-merge-to-spv.* -# branch is $BUILDKITE_BRANCH maybe -# see buildkite.com/pipelines/conditionals -# add "if: build.branch ~ /to-spv/" -# and "if: build.branch !~ /to-spv/" - ############################################################################## # INDIVIDUAL TILE RUNS # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# For non-GF (TSMC) branches, run the usual three tile tests. +# But GF branches cannot run synthesis on my machine, so +# add if-condition to do a single RTL-only test for GF branches; + # Set pe max width to 112: fp limits to 110 but then lvs says 112 OK - label: 'ptile init 20m' commands: - $TEST --need_space 30G full_chip tile_array Tile_PE --steps init --debug - .buildkite/pipelines/check_tile_width.sh Tile_PE --max 112 - mflowgen/bin/buildcheck.sh full_chip/*tile_array/*Tile_PE --show-all-errors - if: build.branch !~ /to-spv/ + if: build.branch !~ /to-spv/ && build.branch !~ /spV/ - label: 'mtile init 25m' commands: - $TEST --need_space 30G full_chip tile_array Tile_MemCore --steps init --debug - .buildkite/pipelines/check_tile_width.sh Tile_MemCore --max 250 - mflowgen/bin/buildcheck.sh full_chip/*tile_array/*Tile_MemCore --show-all-errors - if: build.branch !~ /to-spv/ + if: build.branch !~ /to-spv/ && build.branch !~ /spV/ - label: 'gtile init 20m' commands: - $TEST --need_space 30G full_chip glb_top glb_tile --steps init --debug - mflowgen/bin/buildcheck.sh full_chip/*glb_top/*glb_tile --show-all-errors - if: build.branch !~ /to-spv/ + if: build.branch !~ /to-spv/ && build.branch !~ /spV/ # GF branches cannot run synthesis on my machine, so do RTL only I guess - label: 'RTL only' From 9698f2068040f287317e1ede3cf0d4fcdbc7fc2f Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 16 Nov 2022 07:25:55 -0800 Subject: [PATCH 10/63] (re)fixing the run_sim scripts --- mflowgen/common/cadence-xcelium-sim/run_sim.sh | 4 +++- mflowgen/common/synopsys-vcs-sim/run.sh | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mflowgen/common/cadence-xcelium-sim/run_sim.sh b/mflowgen/common/cadence-xcelium-sim/run_sim.sh index 2472b2d568..706a5bd97c 100644 --- a/mflowgen/common/cadence-xcelium-sim/run_sim.sh +++ b/mflowgen/common/cadence-xcelium-sim/run_sim.sh @@ -16,7 +16,9 @@ fi # ADK for GLS if [ -d "inputs/adk" ]; then ARGS="$ARGS inputs/adk/stdcells.v" - ARGS="$ARGS inputs/adk/stdcells-prim.v" + if [ -f "inputs/adk/stdcells-prim.v" ]; then + ARGS="$ARGS inputs/adk/stdcells-prim.v" + fi if [ -f "inputs/adk/stdcells-lvt.v" ]; then ARGS="$ARGS inputs/adk/stdcells-lvt.v" fi diff --git a/mflowgen/common/synopsys-vcs-sim/run.sh b/mflowgen/common/synopsys-vcs-sim/run.sh index b59348e6ee..3ea4c046ab 100755 --- a/mflowgen/common/synopsys-vcs-sim/run.sh +++ b/mflowgen/common/synopsys-vcs-sim/run.sh @@ -21,7 +21,9 @@ fi # ADK for GLS if [ -d "inputs/adk" ]; then ARGS="$ARGS inputs/adk/stdcells.v" - ARGS="$ARGS inputs/adk/stdcells-prim.v" + if [ -f "inputs/adk/stdcells-prim.v" ]; then + ARGS="$ARGS inputs/adk/stdcells-prim.v" + fi if [ -f "inputs/adk/stdcells-lvt.v" ]; then ARGS="$ARGS inputs/adk/stdcells-lvt.v" fi From 142a47fbdcf7047ac4f99dc500d09ad54c32bab4 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 16 Nov 2022 07:34:28 -0800 Subject: [PATCH 11/63] setup-buildkite: merging good changes from both branches --- mflowgen/bin/setup-buildkite.sh | 9 ++++++++- mflowgen/pad_frame/constraints/constraints.tcl | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/mflowgen/bin/setup-buildkite.sh b/mflowgen/bin/setup-buildkite.sh index b3008fe9fd..65fc382b5c 100644 --- a/mflowgen/bin/setup-buildkite.sh +++ b/mflowgen/bin/setup-buildkite.sh @@ -403,6 +403,14 @@ fi echo "--- PIP INSTALL $mflowgen branch $mflowgen_branch"; date pushd $mflowgen git checkout $mflowgen_branch; git pull + + # Local modifications to repo can mean trouble! + if $(git diff | head | grep . > /dev/null); then + echo "+++ ERROR found local mods to mflowgen repo in $mflowgen_branch" + exit 13 + fi + + # Branch is pure, go ahead and install TOP=$PWD; pip install -e . popd @@ -419,7 +427,6 @@ fi which mflowgen; pip list | grep mflowgen - ######################################################################## # GARNET-PD: Installs garnet-pd package so to enable import # and reuse in mflowgen graph construction diff --git a/mflowgen/pad_frame/constraints/constraints.tcl b/mflowgen/pad_frame/constraints/constraints.tcl index 5eddcc33ea..7b07d7643a 100644 --- a/mflowgen/pad_frame/constraints/constraints.tcl +++ b/mflowgen/pad_frame/constraints/constraints.tcl @@ -52,6 +52,11 @@ set_max_fanout 20 $dc_design_name set_max_transition [expr 0.25*${dc_clock_period}] $dc_design_name +# sr 02/2020 +# haha IOPAD cells already have dont_touch property but not ANAIOPAD :( +# Without dont_touch, they disappear during dc-synthesis +set_dont_touch [ get_cells ANAIOPAD* ] + # sr 02/2020 # Arg turns out not all IOPAD cells have dont_touch property I guess set_dont_touch [ get_cells IOPAD* ] From c97dba32a94f27e49b8ba7e9d01d8509e4c68c48 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 16 Nov 2022 07:42:34 -0800 Subject: [PATCH 12/63] merging gen_rtl --- mflowgen/common/rtl/gen_rtl.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mflowgen/common/rtl/gen_rtl.sh b/mflowgen/common/rtl/gen_rtl.sh index 6eb1d360f5..5983c7a2a7 100755 --- a/mflowgen/common/rtl/gen_rtl.sh +++ b/mflowgen/common/rtl/gen_rtl.sh @@ -58,6 +58,15 @@ else if [ "$rtl_docker_image" == "" ]; then rtl_docker_image=${default_image}; fi if [ "$rtl_docker_image" == "default" ]; then rtl_docker_image=${default_image}; fi + # Env var overrides all else + if [ "$RTL_DOCKER_IMAGE" ]; then + echo "+++ WARNING overriding local rtl_docker_image w env var RTL_DOCKER_IMAGE" + echo "WAS $rtl_docker_image" + rtl_docker_image=${RTL_DOCKER_IMAGE} + echo "NOW $rtl_docker_image" + echo "--- continue..." + fi + # To use a docker image with name other than "latest" can do e.g. # rtl_docker_image="stanfordaha/garnet:cst" From 77a9d978fe8efe114e50f7a1a354a77cf9da6d96 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 16 Nov 2022 08:34:31 -0800 Subject: [PATCH 13/63] oops not reday for ANAIOPADs yet --- mflowgen/pad_frame/constraints/constraints.tcl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mflowgen/pad_frame/constraints/constraints.tcl b/mflowgen/pad_frame/constraints/constraints.tcl index 7b07d7643a..5eddcc33ea 100644 --- a/mflowgen/pad_frame/constraints/constraints.tcl +++ b/mflowgen/pad_frame/constraints/constraints.tcl @@ -52,11 +52,6 @@ set_max_fanout 20 $dc_design_name set_max_transition [expr 0.25*${dc_clock_period}] $dc_design_name -# sr 02/2020 -# haha IOPAD cells already have dont_touch property but not ANAIOPAD :( -# Without dont_touch, they disappear during dc-synthesis -set_dont_touch [ get_cells ANAIOPAD* ] - # sr 02/2020 # Arg turns out not all IOPAD cells have dont_touch property I guess set_dont_touch [ get_cells IOPAD* ] From 8c4cfa71a777e9af2cdba49d6e58c85c7b627e2f Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 09:26:11 -0800 Subject: [PATCH 14/63] added a couple more files from tsmc branch --- mflowgen/bin/buildcheck.sh | 71 ++++++++++++++++++++- mflowgen/bin/freq-slack.sh | 124 +++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 3 deletions(-) create mode 100755 mflowgen/bin/freq-slack.sh diff --git a/mflowgen/bin/buildcheck.sh b/mflowgen/bin/buildcheck.sh index 320739694b..ee09a478d7 100755 --- a/mflowgen/bin/buildcheck.sh +++ b/mflowgen/bin/buildcheck.sh @@ -20,6 +20,10 @@ Usage: $0 [ -slrtgeah ] -e,--err do_err -R, --retry do_qcheck -q, --qrc do_qcheck + -t --timing check timing (frequency and slack) + --freq check timing (frequency and slack) + --slack check timing (frequency and slack) + EOF @@ -34,7 +38,7 @@ DBG= # Process command-line args build_dirs=() opstring='' -ALL="sLrleRq" +ALL="sLrleRqt" show_all_errs=false while [ $# -gt 0 ] ; do @@ -52,6 +56,10 @@ while [ $# -gt 0 ] ; do --qrc*) opstring="${opstring}q" ;; # qrc check --QRC*) opstring="${opstring}q" ;; # qrc check + --tim*) opstring="${opstring}t" ;; # timing check, freq and slack + --freq) opstring="${opstring}t" ;; # timing check, freq and slack + --slack) opstring="${opstring}t" ;; # timing check, freq and slack + --all) opstring="${opstring}${ALL}"; ;; -a) opstring="${opstring}${ALL}"; ;; @@ -107,6 +115,7 @@ done [ "${options[e]}" ] && do_err=true [ "${options[R]}" ] && do_qcheck=true [ "${options[q]}" ] && do_qcheck=true +[ "${options[t]}" ] && do_timing=true if [ "$DBG" ]; then test "$do_sizes" == true && echo DO_SIZES @@ -115,6 +124,7 @@ if [ "$DBG" ]; then test "$do_logs" == true && echo DO_LOGS test "$do_err" == true && echo DO_ERR test "$do_qcheck" == true && echo DO_QCHECK + test "$do_timing" == true && echo DO_TIMING # exit fi @@ -231,6 +241,18 @@ if [ "$do_sizes" ]; then # E.g. "16-Tile_MemCore SIZE 243 BY 88 ; AREA 15645" printf "%-30s %s %4.0f %s %4.0f %s AREA %9.0f\n" $f1 $lef_size $signoff_area + # Look for size of e.g. "19-tile_array" or "17-tile_array" + # FATAL ERROR if tile array is bigger than the chip!!! + if expr "$f1" > /dev/null : '.*-tile_array$'; then + xdim=`echo "$lef_size" | awk '{printf("%d", $2)}'` + ydim=`echo "$lef_size" | awk '{printf("%d", $4)}'` + if [ $xdim -gt 4700 ]; then + msg="**ERROR/FATAL tile array x dimension > 4700 (overlaps pad frame!)" + FATAL=`printf "${FATAL}${msg}\n"` + echo $msg + fi + fi + done if [ "$found_lefs" != "True" ]; then echo " No lefs found"; fi fi @@ -296,6 +318,38 @@ if [ "$do_logs" ]; then # echo '--------------------------------------------------------------------------------' fi +######################################################################## +# Timing information (clock period / frequency and slack) via freq-slack.sh e.g. +# Module Target Frequency Slack +# ------------------------------------------------- +# GarnetSOC 1000 MHz 1.00 ns -3.670 +# +# global_controller 1000 MHz 1.00 ns -0.004 +# global_buffer 900 MHz 1.11 ns -3.136 +# glb_tile 900 MHz 1.11 ns -0.162 +# +# tile_array 909 MHz 1.10 ns -0.046 +# Tile_MemCore 909 MHz 1.10 ns -0.034 +# Tile_PE 909 MHz 1.10 ns -0.233 +# ------------------------------------------------- +# * Clock speed from *-signoff/results/*.pt.sdc +# * Slack from */reports/postroute_all.tarpt +if [ "$do_timing" ]; then + echo ''; echo "+++ TIMING" + + function where_this_script_lives { + scriptpath=$0 # E.g. "build_tarfile.sh" or "foo/bar/build_tarfile.sh" + scriptdir=${0%/*} # E.g. "build_tarfile.sh" or "foo/bar" + if test "$scriptdir" == "$scriptpath"; then scriptdir="."; fi + # scriptdir=`cd $scriptdir; pwd` + (cd $scriptdir; pwd) + } + script_home=`where_this_script_lives` + $script_home/freq-slack.sh . +fi + + + ######################################################################## # Check to see if we had to restart/retry anywhere b/c of QRC failures if [ "$do_qcheck" ]; then @@ -330,7 +384,7 @@ if [ "$do_err" ]; then # echo $errfiles function chop { cut -b 1-$1; } - for f in $errfiles; do ( + for f in $errfiles; do # Want only the lowest-level log file # e.g. "17-tile_array/17-Tile_PE/24-cadence-genus-genlib/logs/genus.log" @@ -352,7 +406,7 @@ if [ "$do_err" ]; then | grep -v 'Error Limit' \ | chop 80 | sort | uniq -c | sort -rn | head echo "" - ) done | $filter + done | $filter # find * -name \*.log -exec egrep '(^Error|^\*\*ERROR)' {} \; \ # | grep -v 'Error Limit' \ @@ -368,4 +422,15 @@ SEE ALL ERRORS (cut-n-paste): egrep -l "\$pat" \$L && egrep "\$pat" \$L && echo ''; done | less -S EOF + fi + +if [ "$FATAL" ]; then + echo "" + echo "+++ FATAL ERRORS FOUND!!!" + echo "$FATAL" + exit 13 +fi + + + diff --git a/mflowgen/bin/freq-slack.sh b/mflowgen/bin/freq-slack.sh new file mode 100755 index 0000000000..ff403a88b8 --- /dev/null +++ b/mflowgen/bin/freq-slack.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +# To test: +# ssh r7arm-aha; cd /build/gold; $0 + +# function find-frequency { +function find-clock-period { + c=`find * -path '*signoff/results/'$1'.pt.sdc' | head -1` + # echo Found constraint $c + if ! [ "$c" ]; then + # echo "- no info for $1 -" + return + fi + + + # Find clock period and frequency + ck=`egrep 'create_clock|period' $c | egrep 'ideal|MASTER'` + cp=`echo $ck | sed 's/.*period //' | awk '{print $1}'` + echo $cp + +# echo "" +} + +# Sample signoff hold summary +# +--------------------+---------+---------+---------+---------+---------+---------+---------+---------+ +# | Hold mode | all | default |All2Macro| In2Out | In2Reg |Macro2All| Reg2Out | Reg2Reg | +# +--------------------+---------+---------+---------+---------+---------+---------+---------+---------+ +# | WNS (ns):| 0.014 | 0.000 | 0.014 | 0.823 | 0.262 | 0.036 | 0.216 | N/A | +# | TNS (ns):| 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | N/A | +# | Violating Paths:| 0 | 0 | 0 | 0 | 0 | 0 | 0 | N/A | +# | All Paths:|2.06e+05 | 0 | 17152 | 32 | 22528 |1.66e+05 | 576 | N/A | +# +--------------------+---------+---------+---------+---------+---------+---------+---------+---------+ + +function get-slack { + # E.g. + # ptsd = 19-tile_array/28-cadence-innovus-signoff/results/Interconnect.pt.sdc + # headhead = 19-tile_array/28-cadence-innovus-signoff/ + # slack_report = 19-tile_array/28-cadence-innovus-signoff/signoff_hold.summary + m=$1 + ptsd=`find * -path '*signoff/results/'$m'.pt.sdc' | head -1` + head=`echo $ptsd | sed 's,[^/]*$,,'` + headhead=`echo $head | sed 's,/[^/]*/$,,'` + slack_report=${headhead}/reports/signoff_hold.summary + + WNS=`grep WNS $slack_report | sed 's/|/ /' | awk '{print $3}'` + echo $WNS +} + +echo 'Module Target Frequency WNS => Actual Freq' +echo '-------------------------------------------------------------' +for design in \ + GarnetSOC_pad_frame \ + global_controller \ + linebreak \ + global_buffer \ + glb_tile \ + linebreak \ + Interconnect \ + Tile_MemCore \ + Tile_PE \ + ; do + + if [ "$design" == "linebreak" ]; then + echo ""; continue + fi + + # filename => module-name e.g. "Interconnect" => "tile_array" + mod=$design + [ "$mod" == "Interconnect" ] && mod="tile_array" + [ "$mod" == "GarnetSOC_pad_frame" ] && mod="GarnetSOC" + + # Clock period and frequency + cp=`find-clock-period $design` + freq=`echo 1000/$cp | bc` + printf "%-17s %4d MHz (%4.2fns) " $mod $freq $cp + + # Slack + WNS=`get-slack $design` + printf "%6s" $WNS + + # Actual freq + # echo "scale=3; 1000/($cp - $WNS)" + actual_freq=`echo "scale=3; 1000/($cp - $WNS)" | bc` + actual_freq=`echo "1000/($cp - $WNS)" | bc` + # echo " $actual_freq MHz" + printf " => %4d MHz" $actual_freq + + + echo "" + + + +done +echo '-------------------------------------------------' +echo ' * Clock speed from *-signoff/results/*.pt.sdc' +echo ' * Slack from */reports/postroute_all.tarpt' + +############################################################################## +exit +############################################################################## + +function get_reports { find * | grep postroute/reports/postroute_all.tarpt; } + +echo -n "tile_array slack "; get_slack Interconnect + + +# soc=`get_reports | grep SOC`; echo $soc + +function get-slack-postroute { + modname=$1 + reports=`find * | grep postroute/reports/postroute_all.tarpt` + for r in $reports; do + design=`awk '/Design/{print $NF; exit}' $r` + if [ "$design" == "$modname" ]; then + # echo Found report $r; + slack=`grep Slack $r | head -1 | awk '{print $NF}'` + # echo Found slack $slack + # echo $modname slack $slack + echo $slack + break + fi + done +} + From 0fb6db96c37c453c563afb24eb1b2d775dd11935 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 09:35:02 -0800 Subject: [PATCH 15/63] tsmc portion of clk_gate was wrong for some reason... --- global_buffer/design/clk_gate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global_buffer/design/clk_gate.py b/global_buffer/design/clk_gate.py index a7478662f9..361701fa68 100644 --- a/global_buffer/design/clk_gate.py +++ b/global_buffer/design/clk_gate.py @@ -21,7 +21,7 @@ def add_clk_gate_cell(self): self.add_child(f"CG_CELL", CG(self._params), E=self.enable, - CLK=self.clk, + CP=self.clk, TE=const(0, 1), Q=self.gclk) elif self._params.process == "GF": From 10672ca8662a601da6373b306734f5f49f1dc4bd Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 10:23:17 -0800 Subject: [PATCH 16/63] should work for both gf and tsmc --- mflowgen/common/pwr-aware-gls/run_sim.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mflowgen/common/pwr-aware-gls/run_sim.sh b/mflowgen/common/pwr-aware-gls/run_sim.sh index c6d1173fe6..2447d11462 100644 --- a/mflowgen/common/pwr-aware-gls/run_sim.sh +++ b/mflowgen/common/pwr-aware-gls/run_sim.sh @@ -7,7 +7,10 @@ ARGS="$ARGS -input cmd.tcl -ALLOWREDEFINITION" # ADK for GLS if [ -d "inputs/adk" ]; then - ARGS="$ARGS inputs/adk/*pwr*.v inputs/adk/stdcells-prim.v" + ARGS="$ARGS inputs/adk/*pwr*.v" + if [ -f "inputs/adk/stdcells-prim.v" ]; then + ARGS="$ARGS inputs/adk/stdcells-prim.v" + fi fi # Set-up testbench From 2b681f2a898d2158c3a49a496090d2be23ef17d3 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 10:30:48 -0800 Subject: [PATCH 17/63] should work for both gf and tsmc --- .../common/power-domains/outputs/dc-dont-use-constraints.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mflowgen/common/power-domains/outputs/dc-dont-use-constraints.tcl b/mflowgen/common/power-domains/outputs/dc-dont-use-constraints.tcl index 8b5091c3b6..040f8e5ae4 100644 --- a/mflowgen/common/power-domains/outputs/dc-dont-use-constraints.tcl +++ b/mflowgen/common/power-domains/outputs/dc-dont-use-constraints.tcl @@ -16,6 +16,6 @@ #**WARN: (IMPSP-270): Cannot find a legal location for MASTER CELL 'XOR4D0BWP16P90'. # Set don't use on these cells upfront in P&R as well as in synthesis -#set_dont_use [get_lib_cells {*/*XNR4D0BWP16P90* */*MUX2D1BWP16P90* */*XOR4D0BWP16P90* */*MUX2D0P75BWP16P90* */*CKLNQOPTBBD1BWP16P90* */*CKMUX2D4BWP16P90* */*CKMUX2D1BWP16P90* }] - +set du_cells [get_lib_cells -quiet {*/*XNR4D0BWP16P90* */*MUX2D1BWP16P90* */*XOR4D0BWP16P90* */*MUX2D0P75BWP16P90* */*CKLNQOPTBBD1BWP16P90* */*CKMUX2D4BWP16P90* */*CKMUX2D1BWP16P90* }] +if { $du_cells != "" } { set_dont_use $du_cells } From 3b6614782e857796eb961835c49efd8c79c85dbb Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 10:38:31 -0800 Subject: [PATCH 18/63] if TSMC / elif GF .... --- global_buffer/design/glb_bank_sram_gen.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_gen.py b/global_buffer/design/glb_bank_sram_gen.py index 1cfd672f81..d723bf3893 100644 --- a/global_buffer/design/glb_bank_sram_gen.py +++ b/global_buffer/design/glb_bank_sram_gen.py @@ -7,7 +7,14 @@ class GlbBankSramGen(Generator): def __init__(self, addr_width, _params: GlobalBufferParams): - super().__init__(f"glb_bank_sram_gen_{addr_width}") + + # FIXME this TSMC/GF fix might goo away after the smoke + # clears, but for now it gets us closer to a common master + if self._params.process == "TSMC": + super().__init__("glb_bank_sram_gen") + elif self._params.process == "GF": + super().__init__(f"glb_bank_sram_gen_{addr_width}") + self._params = _params self.addr_width = addr_width self.sram_macro_width = self._params.sram_macro_word_size From fcb25b4785abe22a4dd1c44ce84b4131ba38c282 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 10:42:17 -0800 Subject: [PATCH 19/63] if TSMC / elif GF .... --- global_buffer/design/glb_bank_sram_stub.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_stub.py b/global_buffer/design/glb_bank_sram_stub.py index 0bfc93b738..77cd7c9d05 100644 --- a/global_buffer/design/glb_bank_sram_stub.py +++ b/global_buffer/design/glb_bank_sram_stub.py @@ -5,7 +5,14 @@ class GlbBankSramStub(Generator): def __init__(self, addr_width, data_width, _params: GlobalBufferParams): - super().__init__(f"glb_bank_sram_stub_{addr_width}_{data_width}") + + # FIXME this TSMC/GF fix might goo away after the smoke + # clears, but for now it gets us closer to a common master + if self._params.process == "TSMC": + super().__init__("glb_bank_sram_stub") + elif self._params.process == "GF": + super().__init__(f"glb_bank_sram_stub_{addr_width}_{data_width}") + self._params = _params self.addr_width = addr_width self.data_width = data_width From b5f242014437b6f8cec1a7553e1d969f4d1301a8 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 12:17:36 -0800 Subject: [PATCH 20/63] cleaned up a file --- mflowgen/pad_frame/constraints/constraints.tcl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mflowgen/pad_frame/constraints/constraints.tcl b/mflowgen/pad_frame/constraints/constraints.tcl index 5eddcc33ea..f68a9ae297 100644 --- a/mflowgen/pad_frame/constraints/constraints.tcl +++ b/mflowgen/pad_frame/constraints/constraints.tcl @@ -53,6 +53,12 @@ set_max_fanout 20 $dc_design_name set_max_transition [expr 0.25*${dc_clock_period}] $dc_design_name # sr 02/2020 -# Arg turns out not all IOPAD cells have dont_touch property I guess -set_dont_touch [ get_cells IOPAD* ] +# haha IOPAD cells already have dont_touch property but not ANAIOPAD :( +# Without dont_touch, they disappear during dc-synthesis +set dt_cells [ get_cells -quiet ANAIOPAD* ] +if { $dt_cells != "" } { set_dont_touch $dt_cells } +# sr 02/2020 +# Arg turns out not all IOPAD cells have dont_touch property I guess +set dt_cells [ get_cells -quiet IOPAD* ] +if { $dt_cells != "" } { set_dont_touch $dt_cells } From 6658c5d9b8753f3ad07de082fe51620fe80c3083 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 12:42:01 -0800 Subject: [PATCH 21/63] changed to match master-dense --- mflowgen/test/test_module.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mflowgen/test/test_module.sh b/mflowgen/test/test_module.sh index 2d00a4b70f..ced91710aa 100755 --- a/mflowgen/test/test_module.sh +++ b/mflowgen/test/test_module.sh @@ -36,7 +36,7 @@ modlist=() VERBOSE=false build_sequence='lvs,gls' build_dir=. -need_space=20G +need_space=100G while [ $# -gt 0 ] ; do case "$1" in -h|--help) help; exit; ;; From f3eec409342f2e27fc00a07cb9d44563b22d4910 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 12:45:28 -0800 Subject: [PATCH 22/63] streamout is different for tsmc --- mflowgen/common/streamout_no_uniquify.py | 22 ++++++++++++++++++++++ mflowgen/soc/construct.py | 5 +++++ 2 files changed, 27 insertions(+) create mode 100644 mflowgen/common/streamout_no_uniquify.py diff --git a/mflowgen/common/streamout_no_uniquify.py b/mflowgen/common/streamout_no_uniquify.py new file mode 100644 index 0000000000..29586c451e --- /dev/null +++ b/mflowgen/common/streamout_no_uniquify.py @@ -0,0 +1,22 @@ +def streamout_no_uniquify(iflow): + '''Remove uniquify flag from stream-out.tcl''' + + # Add removal instructions to iflow step list of 'mflowgen-run' commands + script = "innovus-foundation-flow/custom-scripts/stream-out.tcl" + remove_script = f''' + echo "--- BEGIN EGREGIOUS HACK to undo mflowgen default -uniquify" + echo -n "BEFORE: "; grep uniq {script} || echo okay + grep -v uniq {script} > tmp-stream-out + mv tmp-stream-out {script} + echo -n "AFTER: "; grep uniq {script} || echo okay + ''' + # Make a list + rs_list = remove_script.split("\n") + + # Reverse it, because of how "prepend" works, duh. + rs_list.reverse() + + # Prepend the commands to the script + # for cmd in remove_script.split("\n"): + for cmd in rs_list: + if cmd.strip(): iflow.pre_extend_commands( [ cmd.strip() ] ) diff --git a/mflowgen/soc/construct.py b/mflowgen/soc/construct.py index 7f80cf2584..2774407dd9 100644 --- a/mflowgen/soc/construct.py +++ b/mflowgen/soc/construct.py @@ -126,6 +126,11 @@ def construct(): dc.extend_inputs( soc_rtl.all_outputs() ) dc.extend_inputs( read_design.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python method finds 'stream-out.tcl' and strips out that flag. + from common.streamout_no_uniquify import streamout_no_uniquify + if adk_name == "tsmc16": streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- From 6f32cc6106d16c4797a4039f4564f00f775af478 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 12:56:50 -0800 Subject: [PATCH 23/63] one branch to rule them all --- .../global_controller/get_global_controller_outputs.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mflowgen/full_chip/global_controller/get_global_controller_outputs.sh b/mflowgen/full_chip/global_controller/get_global_controller_outputs.sh index eb1032ae50..731d4b7861 100644 --- a/mflowgen/full_chip/global_controller/get_global_controller_outputs.sh +++ b/mflowgen/full_chip/global_controller/get_global_controller_outputs.sh @@ -9,7 +9,14 @@ else fi mkdir -p outputs -cp -L *cadence-innovus-genlib/outputs/design.lib outputs/global_controller_tt.lib + +if [ -f *cadence-genus-genlib/outputs/design.lib ]; then + cp -L *cadence-genus-genlib/outputs/design.lib outputs/global_controller_tt.lib + +elif [ -f *cadence-innovus-genlib/outputs/design.lib ]; then + cp -L *cadence-innovus-genlib/outputs/design.lib outputs/global_controller_tt.lib +fi + cp -L *synopsys-dc-lib2db/outputs/design.db outputs/global_controller_tt.db cp -L *cadence-innovus-signoff/outputs/design.lef outputs/global_controller.lef cp -L *cadence-innovus-signoff/outputs/design.vcs.v outputs/global_controller.vcs.v From eea6ab88ff3cfa30884f8b4a7c40fdd987c779ef Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 17 Nov 2022 16:56:37 -0800 Subject: [PATCH 24/63] merging another file --- mflowgen/glb_top/sim-gl-compile/run_sim.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mflowgen/glb_top/sim-gl-compile/run_sim.sh b/mflowgen/glb_top/sim-gl-compile/run_sim.sh index e54e84f93c..5974d900f0 100644 --- a/mflowgen/glb_top/sim-gl-compile/run_sim.sh +++ b/mflowgen/glb_top/sim-gl-compile/run_sim.sh @@ -3,10 +3,20 @@ ln -s ./inputs/Makefile ln -s ./inputs/gls ln -s ./inputs/sim +# Assumes gf adk has 'stdcells-prim.v' and tsmc does not + if [ $PWR_AWARE = "True" ]; then + if [ -f inputs/adk/stdcells-prim.v ]; then NETLIST_FILES="inputs/adk/stdcells-pwr.v inputs/glb_tile_sram_pwr.v inputs/glb_tile.vcs.pg.v inputs/design.vcs.pg.v" + else + # FIXME: PM standard cells should be removed! + NETLIST_FILES="inputs/adk/stdcells-pwr.v inputs/adk/stdcells-pm-pwr.v inputs/glb_tile_sram_pwr.v inputs/glb_tile.vcs.pg.v inputs/design.vcs.pg.v" + fi else + if [ -f inputs/adk/stdcells-prim.v ]; then NETLIST_FILES="inputs/glb_tile_sram.v inputs/adk/stdcells-prim.v inputs/adk/stdcells.v inputs/glb_tile.vcs.v inputs/design.vcs.v" + else + NETLIST_FILES="inputs/glb_tile_sram.v inputs/adk/stdcells.v inputs/adk/stdcells-pm.v inputs/glb_tile.vcs.v inputs/design.vcs.v" fi if [ $sdf = "True" ]; then From 53d275e0f51b12be7e6f1911e06772a238f3ed4f Mon Sep 17 00:00:00 2001 From: steveri Date: Tue, 29 Nov 2022 10:23:54 -0800 Subject: [PATCH 25/63] fixing a typo --- global_buffer/design/glb_bank_sram_stub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_stub.py b/global_buffer/design/glb_bank_sram_stub.py index 77cd7c9d05..a94f7e2b56 100644 --- a/global_buffer/design/glb_bank_sram_stub.py +++ b/global_buffer/design/glb_bank_sram_stub.py @@ -6,7 +6,7 @@ class GlbBankSramStub(Generator): def __init__(self, addr_width, data_width, _params: GlobalBufferParams): - # FIXME this TSMC/GF fix might goo away after the smoke + # FIXME this TSMC/GF fix might go away after the smoke # clears, but for now it gets us closer to a common master if self._params.process == "TSMC": super().__init__("glb_bank_sram_stub") From 07c2d614011342f39c5a12292aeb66e739cb17e6 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 30 Nov 2022 08:02:14 -0800 Subject: [PATCH 26/63] Merged Tile_Memcore/contruct.py --- mflowgen/Tile_MemCore/construct.py | 92 ++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 24 deletions(-) diff --git a/mflowgen/Tile_MemCore/construct.py b/mflowgen/Tile_MemCore/construct.py index 97c2945859..a5fc4753ff 100644 --- a/mflowgen/Tile_MemCore/construct.py +++ b/mflowgen/Tile_MemCore/construct.py @@ -21,7 +21,7 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' pwr_aware = True @@ -35,6 +35,13 @@ def construct(): if synth_power: pwr_aware = False + want_drc_pm = True + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'multicorner-multivt' + want_drc_pm = False + if adk_name == 'tsmc16': read_hdl_defines = 'TSMC16' elif adk_name == 'gf12-adk': @@ -75,6 +82,14 @@ def construct(): 'drc_env_setup' : 'drcenv-block.sh' } + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'corner' : "tt0p8v25c", + 'bc_corner' : "ffg0p88v125c", + 'hold_target_slack' : 0.015, + 'interconnect_only' : True, + }) + #----------------------------------------------------------------------- # Create nodes #----------------------------------------------------------------------- @@ -99,7 +114,8 @@ def construct(): testbench = Step( this_dir + '/../common/testbench' ) application = Step( this_dir + '/../common/application' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + if want_drc_pm: + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) if synth_power: post_synth_power = Step( this_dir + '/../common/tile-post-synth-power' ) post_pnr_power = Step( this_dir + '/../common/tile-post-pnr-power' ) @@ -123,7 +139,11 @@ def construct(): postroute_hold = Step( 'cadence-innovus-postroute_hold', default=True ) signoff = Step( 'cadence-innovus-signoff', default=True ) pt_signoff = Step( 'synopsys-pt-timing-signoff', default=True ) - genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + if adk_name == 'gf12-adk': + genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + else: + genlibdb = Step( 'cadence-genus-genlib', default=True ) + if which("calibre") is not None: drc = Step( 'mentor-calibre-drc', default=True ) lvs = Step( 'mentor-calibre-lvs', default=True ) @@ -140,8 +160,14 @@ def construct(): # Add sram macro inputs to downstream nodes synth.extend_inputs( ['sram_tt.lib', 'sram.lef'] ) - pt_signoff.extend_inputs( ['sram_tt.db'] ) - genlibdb.extend_inputs( ['sram_tt.lib', 'sram_tt.db'] ) + + if adk_name == 'tsmc16': + #pt_signoff.extend_inputs( ['sram_tt.db'] ) + genlibdb.extend_inputs( ['sram_tt.lib'] ) + + elif adk_name == 'gf12-adk': + pt_signoff.extend_inputs( ['sram_tt.db'] ) + genlibdb.extend_inputs( ['sram_tt.lib', 'sram_tt.db'] ) # These steps need timing and lef info for srams @@ -180,6 +206,11 @@ def construct(): order.append( 'copy_sdc.tcl' ) synth.set_param( 'order', order ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This strips off the unwanted flag + from common.streamout_no_uniquify import streamout_no_uniquify + if adk_name == "tsmc16": streamout_no_uniquify(iflow) + # Power aware setup if pwr_aware: @@ -236,7 +267,6 @@ def construct(): g.add_step( genlibdb ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) g.add_step( lvs ) g.add_step( custom_lvs ) g.add_step( debugcalibre ) @@ -271,7 +301,6 @@ def construct(): g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) g.connect_by_name( adk, lvs ) g.connect_by_name( gen_sram, synth ) @@ -288,7 +317,6 @@ def construct(): g.connect_by_name( gen_sram, genlibdb ) g.connect_by_name( gen_sram, pt_signoff ) g.connect_by_name( gen_sram, drc ) - g.connect_by_name( gen_sram, drc_pm ) g.connect_by_name( gen_sram, lvs ) g.connect_by_name( rtl, synth ) @@ -311,9 +339,6 @@ def construct(): g.connect_by_name( iflow, postroute ) g.connect_by_name( iflow, postroute_hold ) g.connect_by_name( iflow, signoff ) - # Need this because we're using innovus for lib generation - g.connect_by_name( iflow, genlibdb ) - g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) @@ -328,10 +353,8 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) g.connect_by_name( signoff, genlibdb ) @@ -360,7 +383,6 @@ def construct(): g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) g.connect_by_name( drc, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) # Pwr aware steps: @@ -380,6 +402,19 @@ def construct(): g.connect_by_name( signoff, pwr_aware_gls) #g.connect(power_domains.o('pd-globalnetconnect.tcl'), power.i('globalnetconnect.tcl')) + # New step, added for gf12 + if want_drc_pm: + g.add_step( drc_pm ) + g.connect_by_name( adk, drc_pm ) + g.connect_by_name( gen_sram, drc_pm ) + g.connect_by_name( signoff, drc_pm ) + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) + g.connect_by_name( drc_pm, debugcalibre ) + + # Need this because gf12 uses innovus for lib generation + if adk_name == 'gf12-adk': + g.connect_by_name( iflow, genlibdb ) + #----------------------------------------------------------------------- # Parameterize #----------------------------------------------------------------------- @@ -428,16 +463,25 @@ def construct(): init.update_params( { 'order': order } ) # Adding new input for genlibdb node to run - order = genlibdb.get_param('order') # get the default script run order - extraction_idx = order.index( 'extract_model.tcl' ) # find extract_model.tcl - order.insert( extraction_idx, 'genlibdb-constraints.tcl' ) # add here - genlibdb.update_params( { 'order': order } ) - - # genlibdb -- Remove 'report-interface-timing.tcl' beacuse it takes - # very long and is not necessary - order = genlibdb.get_param('order') - order.remove( 'write-interface-timing.tcl' ) - genlibdb.update_params( { 'order': order } ) + + if adk_name == 'gf12-adk': + # gf12 uses synopsys-ptpx for genlib (default is cadence-genus) + order = genlibdb.get_param('order') # get the default script run order + extraction_idx = order.index( 'extract_model.tcl' ) # find extract_model.tcl + order.insert( extraction_idx, 'genlibdb-constraints.tcl' ) # add here + genlibdb.update_params( { 'order': order } ) + + # genlibdb -- Remove 'report-interface-timing.tcl' beacuse it takes + # very long and is not necessary + order = genlibdb.get_param('order') + order.remove( 'write-interface-timing.tcl' ) + genlibdb.update_params( { 'order': order } ) + + else: + order = genlibdb.get_param('order') # get the default script run order + read_idx = order.index( 'read_design.tcl' ) # find read_design.tcl + order.insert( read_idx + 1, 'genlibdb-constraints.tcl' ) # add here + genlibdb.update_params( { 'order': order } ) # Pwr aware steps: From fedae26a6bf9b8935856c79494a09493b8fc9649 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 30 Nov 2022 10:08:55 -0800 Subject: [PATCH 27/63] Merged Tile_PE/contruct.py --- mflowgen/Tile_PE/construct.py | 111 ++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 25 deletions(-) diff --git a/mflowgen/Tile_PE/construct.py b/mflowgen/Tile_PE/construct.py index 616be474ad..91bae4ed2f 100644 --- a/mflowgen/Tile_PE/construct.py +++ b/mflowgen/Tile_PE/construct.py @@ -21,7 +21,7 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' pwr_aware = True @@ -39,6 +39,15 @@ def construct(): if synth_power: pwr_aware = False + want_drc_pm = True + want_custom_cts = True + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'multicorner-multivt' + want_drc_pm = False + want_custom_cts = False + if adk_name == 'tsmc16': read_hdl_defines = 'TSMC16' elif adk_name == 'gf12-adk': @@ -70,6 +79,11 @@ def construct(): 'strip_path' : 'testbench/dut', 'drc_env_setup' : 'drcenv-block.sh' } + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'interconnect_only' : True, + 'core_density_target': 0.63, + }) # User-level option to change clock frequency # E.g. 'export clock_period_PE="4.0"' to target 250MHz @@ -99,7 +113,8 @@ def construct(): custom_genus_scripts = Step( this_dir + '/custom-genus-scripts' ) custom_flowgen_setup = Step( this_dir + '/custom-flowgen-setup' ) custom_power = Step( this_dir + '/../common/custom-power-leaf' ) - custom_cts = Step( this_dir + '/custom-cts' ) + if want_custom_cts: + custom_cts = Step( this_dir + '/custom-cts' ) short_fix = Step( this_dir + '/../common/custom-short-fix' ) genlibdb_constraints = Step( this_dir + '/../common/custom-genlibdb-constraints' ) custom_timing_assert = Step( this_dir + '/../common/custom-timing-assert' ) @@ -107,7 +122,8 @@ def construct(): testbench = Step( this_dir + '/../common/testbench' ) application = Step( this_dir + '/../common/application' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + if want_drc_pm: + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) if synth_power: post_synth_power = Step( this_dir + '/../common/tile-post-synth-power' ) post_pnr_power = Step( this_dir + '/../common/tile-post-pnr-power' ) @@ -132,7 +148,11 @@ def construct(): postroute = Step( 'cadence-innovus-postroute', default=True ) signoff = Step( 'cadence-innovus-signoff', default=True ) pt_signoff = Step( 'synopsys-pt-timing-signoff', default=True ) - genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + if adk_name == 'gf12-adk': + genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + else: + genlibdb = Step( 'cadence-genus-genlib', default=True ) + if which("calibre") is not None: drc = Step( 'mentor-calibre-drc', default=True ) lvs = Step( 'mentor-calibre-lvs', default=True ) @@ -149,7 +169,6 @@ def construct(): # Add extra input edges to innovus steps that need custom tweaks init.extend_inputs( custom_init.all_outputs() ) power.extend_inputs( custom_power.all_outputs() ) - cts.extend_inputs( custom_cts.all_outputs() ) genlibdb.extend_inputs( genlibdb_constraints.all_outputs() ) synth.extend_inputs( custom_genus_scripts.all_outputs() ) iflow.extend_inputs( custom_flowgen_setup.all_outputs() ) @@ -181,8 +200,16 @@ def construct(): 'pe-constraints-2.tcl', 'dc-dont-use-constraints.tcl']) + # Eventually want to extend this to GF as well...! + if adk_name == 'tsmc16': + synth.extend_inputs([ 'check-pdcr-address.sh' ]) + init.extend_inputs(['upf_Tile_PE.tcl', 'pe-load-upf.tcl', 'dont-touch-constraints.tcl', 'pe-pd-params.tcl', 'pd-aon-floorplan.tcl', 'add-endcaps-welltaps-setup.tcl', 'pd-add-endcaps-welltaps.tcl', 'add-power-switches.tcl', 'check-clamp-logic-structure.tcl']) + # Eventually want to extend this to GF as well...! + if adk_name == 'tsmc16': + init.extend_inputs([ 'check-pdcr-address.sh' ]) + # Need pe-pd-params for parm 'vdd_m3_stripe_sparsity' # pd-globalnetconnect, pe-pd-params come from 'power-domains' node power.extend_inputs(['pd-globalnetconnect.tcl', 'pe-pd-params.tcl'] ) @@ -195,9 +222,21 @@ def construct(): signoff.extend_inputs(['conn-aon-cells-vdd.tcl', 'pd-generate-lvs-netlist.tcl', 'check-clamp-logic-structure.tcl'] ) pwr_aware_gls.extend_inputs(['design.vcs.pg.v']) + # Eventually want to extend this to GF as well...! + if adk_name == 'tsmc16': + # Fix and repair PowerDomainConfigReg when/if magma decides to renumber it :( + synth.pre_extend_commands( ['./inputs/check-pdcr-address.sh'] ) + init.pre_extend_commands( ['./inputs/check-pdcr-address.sh'] ) + pwr_aware_gls.pre_extend_commands( ['./assign-pdcr-address.sh'] ) + # Add short_fix script(s) to list of available postroute scripts postroute.extend_inputs( short_fix.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python method finds 'stream-out.tcl' and strips out that flag. + from common.streamout_no_uniquify import streamout_no_uniquify + if adk_name == "tsmc16": streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- @@ -215,7 +254,6 @@ def construct(): g.add_step( custom_init ) g.add_step( power ) g.add_step( custom_power ) - g.add_step( custom_cts ) g.add_step( place ) g.add_step( cts ) g.add_step( postcts_hold ) @@ -228,7 +266,6 @@ def construct(): g.add_step( genlibdb ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) g.add_step( lvs ) g.add_step( debugcalibre ) @@ -262,7 +299,6 @@ def construct(): g.connect_by_name( adk, postroute ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) g.connect_by_name( adk, lvs ) g.connect_by_name( rtl, synth ) @@ -290,11 +326,9 @@ def construct(): g.connect_by_name( iflow, route ) g.connect_by_name( iflow, postroute ) g.connect_by_name( iflow, signoff ) - g.connect_by_name( iflow, genlibdb ) g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) - g.connect_by_name( custom_cts, cts ) # Fetch short-fix script in prep for eventual use by postroute g.connect_by_name( short_fix, postroute ) @@ -308,10 +342,8 @@ def construct(): g.connect_by_name( postroute, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) g.connect_by_name( signoff, genlibdb ) @@ -337,7 +369,6 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( drc, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) @@ -356,6 +387,24 @@ def construct(): g.connect_by_name( signoff, pwr_aware_gls) #g.connect(power_domains.o('pd-globalnetconnect.tcl'), power.i('globalnetconnect.tcl')) + # New 'custom_cts' step added for gf12 + if want_custom_cts: + cts.extend_inputs( custom_cts.all_outputs() ) + g.add_step( custom_cts ) + g.connect_by_name( custom_cts, cts ) + + # New 'drc_pm' step, added for gf12 + if want_drc_pm: + g.add_step( drc_pm ) + g.connect_by_name( adk, drc_pm ) + g.connect_by_name( signoff, drc_pm ) + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) + g.connect_by_name( drc_pm, debugcalibre ) + + # Need this because gf12 uses innovus for lib generation + if adk_name == 'gf12-adk': + g.connect_by_name( iflow, genlibdb ) + #----------------------------------------------------------------------- # Parameterize #----------------------------------------------------------------------- @@ -402,24 +451,36 @@ def construct(): init.update_params( { 'order': order } ) # Adding new input for genlibdb node to run - order = genlibdb.get_param('order') # get the default script run order - extract_idx = order.index( 'extract_model.tcl' ) # find extract_model.tcl - order.insert( extract_idx, 'genlibdb-constraints.tcl' ) # add here - genlibdb.update_params( { 'order': order } ) - - # genlibdb -- Remove 'report-interface-timing.tcl' beacuse it takes - # very long and is not necessary - order = genlibdb.get_param('order') - order.remove( 'write-interface-timing.tcl' ) - genlibdb.update_params( { 'order': order } ) + + if adk_name == 'gf12-adk': + # gf12 uses synopsys-ptpx for genlib (default is cadence-genus) + order = genlibdb.get_param('order') # get the default script run order + extraction_idx = order.index( 'extract_model.tcl' ) # find extract_model.tcl + order.insert( extraction_idx, 'genlibdb-constraints.tcl' ) # add here + genlibdb.update_params( { 'order': order } ) + + # genlibdb -- Remove 'report-interface-timing.tcl' beacuse it takes + # very long and is not necessary + order = genlibdb.get_param('order') + order.remove( 'write-interface-timing.tcl' ) + genlibdb.update_params( { 'order': order } ) + + else: + order = genlibdb.get_param('order') # get the default script run order + read_idx = order.index( 'read_design.tcl' ) # find read_design.tcl + order.insert( read_idx + 1, 'genlibdb-constraints.tcl' ) # add here + genlibdb.update_params( { 'order': order } ) + # Pwr aware steps: if pwr_aware: # init node order = init.get_param('order') read_idx = order.index( 'floorplan.tcl' ) # find floorplan.tcl - order.insert( read_idx + 1, 'pe-load-upf.tcl' ) # add here - order.insert( read_idx + 2, 'pe-pd-params.tcl' ) # add here + + # 09/2022 reordered to load params (pe-pd-params) before using params (pe-load-upf) + order.insert( read_idx + 1, 'pe-pd-params.tcl' ) # add here + order.insert( read_idx + 2, 'pe-load-upf.tcl' ) # add here order.insert( read_idx + 3, 'pd-aon-floorplan.tcl' ) # add here order.insert( read_idx + 4, 'add-endcaps-welltaps-setup.tcl' ) # add here order.insert( read_idx + 5, 'pd-add-endcaps-welltaps.tcl' ) # add here From 7c5e00076add8a62ddd3d59b1774250b237a7a8c Mon Sep 17 00:00:00 2001 From: steveri Date: Mon, 5 Dec 2022 08:27:14 -0800 Subject: [PATCH 28/63] merged fullchip construct.py --- mflowgen/full_chip/construct.py | 316 ++++++++++++++++++++++---------- 1 file changed, 224 insertions(+), 92 deletions(-) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index 95046bf48d..ec10f6688f 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -44,8 +44,14 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' + which_soc = 'onyx' + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'view-standard' + which_soc = 'amber' if which("calibre") is not None: drc_rule_deck = 'calibre-drc-chip.rule' @@ -116,11 +122,26 @@ def construct(): 'cgra_apps' : ["tests/conv_1_2", "tests/conv_2_1"] } + # Note 'lvs_adk_view' not used by amber/tsmc if parameters['PWR_AWARE'] == True: parameters['lvs_adk_view'] = adk_view + '-pm' else: parameters['lvs_adk_view'] = adk_view + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'clock_period' : 1.3, + 'include_sealring' : True, + 'num_words' : 2048, + 'corner' : "tt0p8v25c", + # Dragonphy + 'dragonphy_rdl_x' : '613.565u', + 'dragonphy_rdl_y' : '3901.872u', + 'hold_target_slack' : 0.060, + }) + + # 'sram_2' and 'guarding' are onyx/GF-only parameters (for now) + sram_2_params = { # SRAM macros 'num_words' : 32768, @@ -158,23 +179,28 @@ def construct(): custom_init = Step( this_dir + '/custom-init' ) custom_lvs = Step( this_dir + '/custom-lvs-rules' ) custom_power = Step( this_dir + '/../common/custom-power-chip' ) - custom_cts = Step( this_dir + '/custom-cts' ) + if which_soc == 'onyx': + custom_cts = Step( this_dir + '/custom-cts' ) init_fc = Step( this_dir + '/../common/init-fullchip' ) io_file = Step( this_dir + '/io_file' ) pre_route = Step( this_dir + '/pre-route' ) sealring = Step( this_dir + '/sealring' ) netlist_fixing = Step( this_dir + '/../common/fc-netlist-fixing' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) - drc_dp = Step( this_dir + '/gf-drc-dp' ) - drc_mas = Step( this_dir + '/../common/gf-mentor-calibre-drc-mas' ) + if which_soc == 'onyx': + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + drc_dp = Step( this_dir + '/gf-drc-dp' ) + drc_mas = Step( this_dir + '/../common/gf-mentor-calibre-drc-mas' ) # Block-level designs tile_array = Step( this_dir + '/tile_array' ) glb_top = Step( this_dir + '/glb_top' ) global_controller = Step( this_dir + '/global_controller' ) - dragonphy = Step( this_dir + '/dragonphy' ) - xgcd = Step( this_dir + '/xgcd' ) + + if which_soc == 'amber': + dragonphy = Step( this_dir + '/dragonphy') + elif which_soc == 'onyx': + xgcd = Step( this_dir + '/xgcd' ) # CGRA simulation @@ -203,29 +229,33 @@ def construct(): postroute_hold = Step( 'cadence-innovus-postroute_hold', default=True ) signoff = Step( 'cadence-innovus-signoff', default=True ) pt_signoff = Step( 'synopsys-pt-timing-signoff', default=True ) + if which("calibre") is not None: drc = Step( 'mentor-calibre-drc', default=True ) lvs = Step( 'mentor-calibre-lvs', default=True ) - merge_gdr = Step( 'mentor-calibre-gdsmerge-child', default=True ) # GF has a different way of running fill if adk_name == 'gf12-adk': fill = Step (this_dir + '/../common/mentor-calibre-fill-gf' ) + merge_gdr = Step( 'mentor-calibre-gdsmerge-child', default=True ) else: fill = Step( 'mentor-calibre-fill', default=True ) + merge_rdl = Step( 'mentor-calibre-gdsmerge-child', default=True ) + merge_rdl.set_name('gdsmerge-dragonphy-rdl') + merge_fill = Step( 'mentor-calibre-gdsmerge-child', default=True ) else: - drc = Step( 'cadence-pegasus-drc', default=True ) - lvs = Step( 'cadence-pegasus-lvs', default=True ) - fill = Step( 'cadence-pegasus-fill', default=True ) - merge_fill = Step( 'cadence-pegasus-gdsmerge-child', default=True ) + assert False, 'Sorry! Removed cadence-pegasus option' + debugcalibre = Step( 'cadence-innovus-debug-calibre', default=True ) merge_fill.set_name('gdsmerge-fill') # Send in the clones - # Second sram_node because soc has 2 types of srams - gen_sram_2 = gen_sram.clone() - gen_sram_2.set_name( 'gen_sram_macro_2' ) + + if which_soc == 'onyx': + # Second sram_node because soc has 2 types of srams + gen_sram_2 = gen_sram.clone() + gen_sram_2.set_name( 'gen_sram_macro_2' ) # 'power' step now gets its own design-rule check power_drc = drc.clone() @@ -234,14 +264,15 @@ def construct(): # Antenna DRC Check antenna_drc = drc.clone() antenna_drc.set_name( 'antenna-drc' ) - - # Pre-Fill DRC Check - prefill_drc = drc.clone() - prefill_drc.set_name( 'pre-fill-drc' ) - - # Separate ADK for LVS so it has PM cells when needed - lvs_adk = adk.clone() - lvs_adk.set_name( 'lvs_adk' ) + + if which_soc == 'onyx': + # Pre-Fill DRC Check + prefill_drc = drc.clone() + prefill_drc.set_name( 'pre-fill-drc' ) + + # Separate ADK for LVS so it has PM cells when needed + lvs_adk = adk.clone() + lvs_adk.set_name( 'lvs_adk' ) # Add cgra tile macro inputs to downstream nodes @@ -249,14 +280,27 @@ def construct(): synth.extend_inputs( ['glb_top_tt.lib', 'glb_top.lef'] ) synth.extend_inputs( ['global_controller_tt.lib', 'global_controller.lef'] ) synth.extend_inputs( ['sram_tt.lib', 'sram.lef'] ) - synth.extend_inputs( ['sram_2_tt.lib', 'sram_2.lef'] ) - synth.extend_inputs( ['xgcd_tt.lib', 'xgcd.lef'] ) + + if which_soc == 'onyx': + synth.extend_inputs( ['sram_2_tt.lib', 'sram_2.lef'] ) + synth.extend_inputs( ['xgcd_tt.lib', 'xgcd.lef'] ) + + elif which_soc == 'amber': + # Exclude dragonphy_top from synth inputs to prevent + # floating dragonphy inputs from being tied to 0 + synth.extend_inputs( ['dragonphy_top.lef'] ) + pt_signoff.extend_inputs( ['tile_array_tt.db'] ) pt_signoff.extend_inputs( ['glb_top_tt.db'] ) pt_signoff.extend_inputs( ['global_controller_tt.db'] ) pt_signoff.extend_inputs( ['sram_tt.db'] ) - pt_signoff.extend_inputs( ['sram_2_tt.db'] ) - pt_signoff.extend_inputs( ['xgcd_tt.db'] ) + + if which_soc == 'onyx': + pt_signoff.extend_inputs( ['sram_2_tt.db'] ) + pt_signoff.extend_inputs( ['xgcd_tt.db'] ) + + elif which_soc == 'amber': + pt_signoff.extend_inputs( ['dragonphy_top_tt.db'] ) route.extend_inputs( ['pre-route.tcl'] ) signoff.extend_inputs( sealring.all_outputs() ) @@ -272,8 +316,14 @@ def construct(): step.extend_inputs( ['glb_top_tt.lib', 'glb_top.lef'] ) step.extend_inputs( ['global_controller_tt.lib', 'global_controller.lef'] ) step.extend_inputs( ['sram_tt.lib', 'sram.lef'] ) - step.extend_inputs( ['sram_2_tt.lib', 'sram_2.lef'] ) - step.extend_inputs( ['xgcd_tt.lib', 'xgcd.lef'] ) + + if which_soc == 'onyx': + step.extend_inputs( ['sram_2_tt.lib', 'sram_2.lef'] ) + step.extend_inputs( ['xgcd_tt.lib', 'xgcd.lef'] ) + + elif which_soc == 'amber': + step.extend_inputs( ['dragonphy_top_tt.lib', 'dragonphy_top.lef'] ) + step.extend_inputs( ['dragonphy_RDL.lef'] ) # Need all block gds's to merge into the final layout gdsmerge_nodes = [signoff, power] @@ -282,8 +332,14 @@ def construct(): node.extend_inputs( ['glb_top.gds'] ) node.extend_inputs( ['global_controller.gds'] ) node.extend_inputs( ['sram.gds'] ) - node.extend_inputs( ['sram_2.gds'] ) - node.extend_inputs( ['xgcd.gds'] ) + + if which_soc == 'onyx': + node.extend_inputs( ['sram_2.gds'] ) + node.extend_inputs( ['xgcd.gds'] ) + + elif which_soc == 'amber': + node.extend_inputs( ['dragonphy_top.gds'] ) + node.extend_inputs( ['dragonphy_RDL.gds'] ) # Need extracted spice files for both tile types to do LVS @@ -293,11 +349,17 @@ def construct(): lvs.extend_inputs( ['glb_top.sram.spi'] ) lvs.extend_inputs( ['global_controller.lvs.v'] ) lvs.extend_inputs( ['sram.spi'] ) - lvs.extend_inputs( ['sram_2.spi'] ) - lvs.extend_inputs( ['xgcd.lvs.v'] ) - lvs.extend_inputs( ['xgcd-255.lvs.v'] ) - lvs.extend_inputs( ['xgcd-1279.lvs.v'] ) - lvs.extend_inputs( ['ring_oscillator.lvs.v'] ) + + if which_soc == 'onyx': + lvs.extend_inputs( ['sram_2.spi'] ) + lvs.extend_inputs( ['xgcd.lvs.v'] ) + lvs.extend_inputs( ['xgcd-255.lvs.v'] ) + lvs.extend_inputs( ['xgcd-1279.lvs.v'] ) + lvs.extend_inputs( ['ring_oscillator.lvs.v'] ) + + elif which_soc == 'amber': + lvs.extend_inputs( ['dragonphy_top.spi'] ) + lvs.extend_inputs( ['adk_lvs2'] ) # Add extra input edges to innovus steps that need custom tweaks @@ -305,7 +367,8 @@ def construct(): init.extend_inputs( custom_init.all_outputs() ) init.extend_inputs( init_fc.all_outputs() ) power.extend_inputs( custom_power.all_outputs() ) - cts.extend_inputs( custom_cts.all_outputs() ) + if which_soc == 'onyx': + cts.extend_inputs( custom_cts.all_outputs() ) synth.extend_inputs( soc_rtl.all_outputs() ) synth.extend_inputs( read_design.all_outputs() ) @@ -317,6 +380,12 @@ def construct(): rtl.extend_outputs( ['header'] ) rtl.extend_postconditions( ["assert File( 'outputs/header' ) "] ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python method finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- @@ -325,12 +394,10 @@ def construct(): g.add_step( rtl ) g.add_step( soc_rtl ) g.add_step( gen_sram ) - g.add_step( gen_sram_2 ) g.add_step( tile_array ) g.add_step( glb_top ) g.add_step( global_controller ) g.add_step( dragonphy ) - g.add_step( xgcd ) g.add_step( constraints ) g.add_step( read_design ) g.add_step( synth ) @@ -341,7 +408,6 @@ def construct(): g.add_step( custom_init ) g.add_step( power ) g.add_step( custom_power ) - g.add_step( custom_cts ) g.add_step( place ) g.add_step( cts ) g.add_step( postcts_hold ) @@ -353,21 +419,13 @@ def construct(): g.add_step( netlist_fixing ) g.add_step( signoff ) g.add_step( pt_signoff ) - g.add_step( prefill_drc ) g.add_step( fill ) g.add_step( merge_fill ) - g.add_step( merge_gdr ) g.add_step( drc ) - g.add_step( drc_pm ) - g.add_step( drc_dp ) - g.add_step( drc_mas ) g.add_step( antenna_drc ) g.add_step( lvs ) g.add_step( custom_lvs ) g.add_step( debugcalibre ) - - # Different adk view for lvs - g.add_step( lvs_adk ) # Post-Power DRC check g.add_step( power_drc ) @@ -378,6 +436,24 @@ def construct(): g.add_step( cgra_rtl_sim_run ) # g.add_step( cgra_gl_sim_compile ) + # Onyx-specific nodes + if which_soc == 'onyx': + g.add_step( gen_sram_2 ) + g.add_step( xgcd ) + g.add_step( custom_cts ) + g.add_step( prefill_drc ) + g.add_step( merge_gdr ) + g.add_step( drc_pm ) + g.add_step( drc_dp ) + g.add_step( drc_mas ) + + # Different adk view for lvs + g.add_step( lvs_adk ) + + # Amber-specific nodes + if which_soc == 'amber': + g.add_step( merge_rdl ) + #----------------------------------------------------------------------- # Graph -- Add edges #----------------------------------------------------------------------- @@ -385,7 +461,6 @@ def construct(): # Connect by name g.connect_by_name( adk, gen_sram ) - g.connect_by_name( adk, gen_sram_2 ) g.connect_by_name( adk, synth ) g.connect_by_name( adk, iflow ) g.connect_by_name( adk, init ) @@ -397,17 +472,26 @@ def construct(): g.connect_by_name( adk, postroute ) g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) - g.connect_by_name( adk, prefill_drc ) g.connect_by_name( adk, fill ) g.connect_by_name( adk, merge_fill ) - g.connect_by_name( adk, merge_gdr ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) - g.connect_by_name( adk, drc_dp ) - g.connect_by_name( adk, drc_mas ) g.connect_by_name( adk, antenna_drc ) - # Use lvs_adk so lvs has access to cells used in lower-level blocks - g.connect_by_name( lvs_adk, lvs ) + + # Onyx-specific connections + if which_soc == 'onyx': + g.connect_by_name( adk, gen_sram_2 ) + g.connect_by_name( adk, prefill_drc ) + g.connect_by_name( adk, merge_gdr ) + g.connect_by_name( adk, drc_pm ) + g.connect_by_name( adk, drc_dp ) + g.connect_by_name( adk, drc_mas ) + # Use lvs_adk so lvs has access to cells used in lower-level blocks + g.connect_by_name( lvs_adk, lvs ) + + # Amber-specific connections + if which_soc == 'amber': + g.connect_by_name( adk, merge_rdl ) + g.connect_by_name( adk, lvs ) # Post-Power DRC check g.connect_by_name( adk, power_drc ) @@ -423,7 +507,14 @@ def construct(): # All of the blocks within this hierarchical design # Skip these if we're doing soc_only if parameters['soc_only'] == False: - blocks = [tile_array, glb_top, global_controller, dragonphy, xgcd] + + # FIXME This seems wrong...why is dragonphy in there??? + if which_soc == 'onyx': + blocks = [tile_array, glb_top, global_controller, dragonphy, xgcd] + + if which_soc == 'amber': + blocks = [tile_array, glb_top, global_controller, dragonphy] + for block in blocks: g.connect_by_name( block, synth ) g.connect_by_name( block, iflow ) @@ -472,15 +563,18 @@ def construct(): g.connect_by_name( custom_init, init ) g.connect_by_name( custom_lvs, lvs ) g.connect_by_name( custom_power, power ) - g.connect_by_name( custom_cts, cts ) - # Connect both gen_sram_macro nodes to all downstream nodes that + if which_soc == 'onyx': + g.connect_by_name( custom_cts, cts ) + + # Connect gen_sram_macro node(s) to all downstream nodes that # need them sram_nodes = [synth, iflow, init, power, place, cts, postcts_hold, route, postroute, postroute_hold, signoff, pt_signoff, drc, lvs] for node in sram_nodes: - g.connect_by_name( gen_sram, node ) + g.connect_by_name( gen_sram, node ) + if which_soc == 'onyx': for sram_output in gen_sram_2.all_outputs(): node_input = sram_output.replace('sram', 'sram_2') if node_input in node.all_inputs(): @@ -499,37 +593,50 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, lvs ) - # Merge guardring gds into design - g.connect(signoff.o('design-merged.gds'), merge_gdr.i('design.gds')) - - # Send gds with sealring to drc, fill, and lvs - g.connect_by_name( merge_gdr, lvs ) - # Run pre-fill DRC after signoff - g.connect_by_name( merge_gdr, prefill_drc ) - # Run PM DRC after signoff - g.connect_by_name( merge_gdr, drc_pm ) - - # Run Fill on merged GDS - g.connect( merge_gdr.o('design_merged.gds'), fill.i('design.gds') ) + if which_soc == 'onyx': + # Merge guardring gds into design + g.connect(signoff.o('design-merged.gds'), merge_gdr.i('design.gds')) + + # Send gds with sealring to drc, fill, and lvs + g.connect_by_name( merge_gdr, lvs ) + # Run pre-fill DRC after signoff + g.connect_by_name( merge_gdr, prefill_drc ) + # Run PM DRC after signoff + g.connect_by_name( merge_gdr, drc_pm ) + + # For GF, Fill is already merged during fill step + + # Run Fill on merged GDS + g.connect( merge_gdr.o('design_merged.gds'), fill.i('design.gds') ) - # For GF, Fill is already merged during fill step - if adk_name == 'gf12-adk': # Connect fill directly to DRC steps g.connect( fill.o('fill.gds'), drc_dp.i('design_merged.gds') ) g.connect( fill.o('fill.gds'), antenna_drc.i('design_merged.gds') ) # Connect drc_dp output gds to final signoff drc g.connect_by_name( drc_dp, drc ) g.connect_by_name( drc_dp, drc_mas ) - else: + + if which_soc == 'amber': + g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) + g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) + + # Skipping + g.connect( signoff.o('design-merged.gds'), merge_rdl.i('design.gds') ) + g.connect( dragonphy.o('dragonphy_RDL.gds'), merge_rdl.i('child.gds') ) + g.connect_by_name( merge_rdl, lvs ) + + # Run Fill on merged GDS + g.connect( merge_rdl.o('design_merged.gds'), fill.i('design.gds') ) + # Merge fill - g.connect( signoff.o('design-merged.gds'), merge_fill.i('design.gds') ) + g.connect( merge_rdl.o('design_merged.gds'), merge_fill.i('design.gds') ) g.connect( fill.o('fill.gds'), merge_fill.i('child.gds') ) # Run DRC on merged and filled gds g.connect_by_name( merge_fill, drc ) g.connect_by_name( merge_fill, antenna_drc ) - + g.connect_by_name( adk, pt_signoff ) g.connect_by_name( signoff, pt_signoff ) @@ -537,8 +644,10 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - #g.connect_by_name( drc, debugcalibre ) - #g.connect_by_name( lvs, debugcalibre ) + + if which_soc == 'amber': + g.connect_by_name( drc, debugcalibre ) + g.connect_by_name( lvs, debugcalibre ) g.connect_by_name( pre_route, route ) g.connect_by_name( sealring, signoff ) @@ -555,12 +664,13 @@ def construct(): print(f'parameters["hold_target_slack"]={parameters["hold_target_slack"]}') g.update_params( parameters ) - # Provide different parameter set to second sram node, so it can actually - # generate a different sram - gen_sram_2.update_params( sram_2_params ) + if which_soc == 'onyx': + # Provide different parameter set to second sram node, so it can actually + # generate a different sram + gen_sram_2.update_params( sram_2_params ) - # LVS adk has separate view parameter - lvs_adk.update_params({ 'adk_view' : parameters['lvs_adk_view']}) + # LVS adk has separate view parameter + lvs_adk.update_params({ 'adk_view' : parameters['lvs_adk_view']}) # Since we are adding an additional input script to the generic Innovus # steps, we modify the order parameter for that node which determines @@ -573,7 +683,19 @@ def construct(): synth.update_params({'TLX_REV_DATA_LO_WIDTH' : parameters['TLX_REV_DATA_LO_WIDTH']}, True) init.update_params({'soc_only': parameters['soc_only']}, True) - init.update_params( + if which_soc == 'amber': init.update_params( + {'order': [ + 'main.tcl','quality-of-life.tcl', + 'stylus-compatibility-procs.tcl','floorplan.tcl','io-fillers.tcl', + 'alignment-cells.tcl', + 'analog-bumps/route-phy-bumps.tcl', + 'analog-bumps/bump-connect.tcl', + 'gen-bumps.tcl', 'check-bumps.tcl', 'route-bumps.tcl', + 'place-macros.tcl', 'dont-touch.tcl' + ]} + ) + + if which_soc == 'onyx': init.update_params( {'order': [ 'main.tcl','quality-of-life.tcl', 'stylus-compatibility-procs.tcl','floorplan.tcl','io-fillers.tcl', @@ -622,15 +744,25 @@ def construct(): order.insert( index, 'netlist-fixing.tcl' ) signoff.update_params( { 'order': order } ) + if which_soc == 'amber': + merge_rdl.update_params( {'coord_x': parameters['dragonphy_rdl_x'], 'coord_y': parameters['dragonphy_rdl_y'], 'flatten_child': True, + 'design_top_cell': parameters['design_name'], 'child_top_cell': 'dragonphy_RDL'} ) - merge_fill.update_params( {'design_top_cell': parameters['design_name'], 'child_top_cell': f"{parameters['design_name']}_F16a"} ) - - # need to give coordinates for guardring - merge_gdr.update_params( guardring_params ) + merge_fill.update_params( {'design_top_cell': parameters['design_name'], 'child_top_cell': f"{parameters['design_name']}_F16a"} ) - # Antenna DRC node needs to use antenna rule deck - antenna_drc.update_params( { 'drc_rule_deck': parameters['antenna_drc_rule_deck'], - 'drc_env_setup': parameters['antenna_drc_env_setup'] } ) + # Antenna DRC node needs to use antenna rule deck + antenna_drc.update_params( { 'drc_rule_deck': parameters['antenna_drc_rule_deck'] } ) + + if which_soc == 'onyx': + + merge_fill.update_params( {'design_top_cell': parameters['design_name'], 'child_top_cell': f"{parameters['design_name']}_F16a"} ) + + # need to give coordinates for guardring + merge_gdr.update_params( guardring_params ) + + # Antenna DRC node needs to use antenna rule deck + antenna_drc.update_params( { 'drc_rule_deck': parameters['antenna_drc_rule_deck'], + 'drc_env_setup': parameters['antenna_drc_env_setup'] } ) # Power DRC node should use block level rule deck to improve runtimes and not report false errors power_drc.update_params( {'drc_rule_deck': parameters['power_drc_rule_deck'] } ) From 3646e061d895d3ab05940fc00804972a2156285f Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 8 Dec 2022 12:17:46 -0800 Subject: [PATCH 29/63] Merged mflowgen/pad_frame/construct.py --- mflowgen/pad_frame/construct.py | 47 ++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/mflowgen/pad_frame/construct.py b/mflowgen/pad_frame/construct.py index 45fe35a716..88f68ce291 100644 --- a/mflowgen/pad_frame/construct.py +++ b/mflowgen/pad_frame/construct.py @@ -17,9 +17,13 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'view-standard' + # TSMC override(s) + if adk_name == 'tsmc16': which_soc = 'amber' + else: which_soc = 'onyx' + parameters = { 'construct_path' : __file__, 'design_name' : 'GarnetSOC_pad_frame', @@ -54,6 +58,16 @@ def construct(): init_fullchip = Step( this_dir + '/../common/init-fullchip' ) netlist_fixing = Step( this_dir + '/../common/fc-netlist-fixing' ) + if which_soc == 'amber': + # Custom step 'pre-flowsetup' + # To get new lef cells e.g. 'icovl-cells.lef' into iflow, we gotta: + # - create new step 'pre_flowsetup' whose outputs are icovl cells + # -- link via "commands" group in pre-iflow/configure.yml + # - connect pre-flowsetup step to flowsetup (iflow) step + # - extend iflow inputs to include icovl cells + # - iflow "setup.tcl" automatically includes "inputs/*.lef" + pre_flowsetup = Step( this_dir + '/pre-flowsetup' ) + # More custom steps custom_power = Step( this_dir + '/../common/custom-power-chip' ) @@ -119,6 +133,22 @@ def construct(): # Ouch. iflow and everyone that connects to iflow must also include # the icovl/dtcd lefs I guess? + if which_soc == 'amber': + pre_flowsetup_followers = [ + # iflow, init, power, place, cts, postcts_hold, route, postroute, signoff + iflow, init # can we get away with this? + ] + for step in pre_flowsetup_followers: + step.extend_inputs( [ + "icovl-cells.lef", "dtcd-cells.lef", + "bumpcells.lef" + ] ) + + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python method finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) #----------------------------------------------------------------------- # Graph -- Add nodes @@ -130,6 +160,8 @@ def construct(): g.add_step( constraints ) g.add_step( dc ) + if which_soc == 'amber': + g.add_step( pre_flowsetup ) g.add_step( iflow ) g.add_step( init_fullchip ) g.add_step( init ) @@ -157,6 +189,8 @@ def construct(): # Connect by name g.connect_by_name( adk, dc ) + if which_soc == 'amber': + g.connect_by_name( adk, pre_flowsetup ) g.connect_by_name( adk, iflow ) g.connect_by_name( adk, init ) g.connect_by_name( adk, init_fill ) @@ -181,6 +215,12 @@ def construct(): g.connect_by_name( dc, power ) g.connect_by_name( dc, place ) + if which_soc == 'amber': + # g.connect_by_name( pre_flowsetup, iflow ) + # iflow, init, power, place, cts, postcts_hold, route, postroute, signoff + for step in pre_flowsetup_followers: + g.connect_by_name( pre_flowsetup, step) + g.connect_by_name( iflow, init ) g.connect_by_name( iflow, power ) g.connect_by_name( iflow, place ) @@ -259,6 +299,11 @@ def construct(): ]} ) + if which_soc == 'amber': + index = init.index( 'alignment-cells.tcl') + init.insert( index+1, 'analog-bumps/route-phy-bumps.tcl' ) + init.insert( index+2, 'analog-bumps/bump-connect.tcl' ) + order = power.get_param('order') order.append( 'add-endcaps-welltaps.tcl' ) power.update_params( { 'order': order } ) From eaebea2f1ab7fe0bb347c93b0278e15607510dc1 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 8 Dec 2022 13:52:11 -0800 Subject: [PATCH 30/63] Merged mflowgen/glb_tile/construct.py --- mflowgen/glb_tile/construct.py | 70 ++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/mflowgen/glb_tile/construct.py b/mflowgen/glb_tile/construct.py index fb1c9ae002..49ccce4d12 100644 --- a/mflowgen/glb_tile/construct.py +++ b/mflowgen/glb_tile/construct.py @@ -21,8 +21,14 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' + which_soc = 'onyx' + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'view-standard' + which_soc = 'amber' parameters = { 'construct_path' : __file__, @@ -51,6 +57,19 @@ def construct(): 'drc_env_setup': 'drcenv-block.sh' } + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'clock_period' : 1.11, + 'bank_height' : 8, + 'num_words' : 2048, + 'corner' : "tt0p8v25c", + }) + + # OG TSMC did not set num_subarrays etc. + if adk_name == 'tsmc16': + parameters.pop('num_subarrays') + parameters.pop('drc_env_setup') + #----------------------------------------------------------------------- # Create nodes #----------------------------------------------------------------------- @@ -69,12 +88,13 @@ def construct(): gen_sram = Step( this_dir + '/../common/gen_sram_macro' ) custom_init = Step( this_dir + '/custom-init' ) custom_power = Step( this_dir + '/../common/custom-power-leaf' ) - custom_cts = Step( this_dir + '/../common/custom-cts' ) short_fix = Step( this_dir + '/../common/custom-short-fix' ) custom_lvs = Step( this_dir + '/custom-lvs-rules' ) genlib = Step( this_dir + '/../common/cadence-innovus-genlib' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + if which_soc == 'onyx': + custom_cts = Step( this_dir + '/../common/custom-cts' ) + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) # Default steps @@ -110,7 +130,7 @@ def construct(): # Add sram macro inputs to downstream nodes - genlib.extend_inputs( ['sram_tt.db'] ) + if which_soc == 'onyx': genlib.extend_inputs( ['sram_tt.db'] ) pt_signoff.extend_inputs( ['sram_tt.db'] ) # These steps need timing and lef info for srams @@ -131,7 +151,8 @@ def construct(): init.extend_inputs( custom_init.all_outputs() ) power.extend_inputs( custom_power.all_outputs() ) - cts.extend_inputs( custom_cts.all_outputs() ) + if which_soc == 'onyx': + cts.extend_inputs( custom_cts.all_outputs() ) # Add header files to outputs rtl.extend_outputs( ['header'] ) @@ -143,6 +164,12 @@ def construct(): postroute_hold.extend_inputs( short_fix.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python script finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- @@ -158,7 +185,8 @@ def construct(): g.add_step( power ) g.add_step( custom_power ) g.add_step( place ) - g.add_step( custom_cts ) + if which_soc == 'onyx': + g.add_step( custom_cts ) g.add_step( cts ) g.add_step( postcts_hold ) g.add_step( route ) @@ -170,7 +198,8 @@ def construct(): g.add_step( genlib ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) + if which_soc == 'onyx': + g.add_step( drc_pm ) g.add_step( lvs ) g.add_step( custom_lvs ) g.add_step( debugcalibre ) @@ -194,7 +223,8 @@ def construct(): g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( adk, drc_pm ) g.connect_by_name( adk, lvs ) g.connect_by_name( adk, genlib ) @@ -212,7 +242,8 @@ def construct(): g.connect_by_name( gen_sram, genlib ) g.connect_by_name( gen_sram, pt_signoff ) g.connect_by_name( gen_sram, drc ) - g.connect_by_name( gen_sram, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( gen_sram, drc_pm ) g.connect_by_name( gen_sram, lvs ) g.connect_by_name( rtl, synth ) @@ -240,7 +271,8 @@ def construct(): g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) - g.connect_by_name( custom_cts, cts ) + if which_soc == 'onyx': + g.connect_by_name( custom_cts, cts ) g.connect_by_name( custom_lvs, lvs ) g.connect_by_name( init, power ) g.connect_by_name( power, place ) @@ -251,10 +283,12 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( signoff, drc_pm ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) + if which_soc == 'onyx': + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) g.connect_by_name( signoff, genlib ) @@ -269,8 +303,9 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( drc, debugcalibre ) + if which_soc == 'onyx': + g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) #----------------------------------------------------------------------- @@ -282,9 +317,12 @@ def construct(): g.update_params( parameters ) # Add bank height param to init - # number of banks is fixed to 2 - bank_height = (parameters['glb_tile_mem_size'] * 1024 // 2) // (parameters['num_words'] * (parameters['word_size'] // 8)) - init.update_params( { 'bank_height': bank_height }, True ) + if which_soc == 'onyx': + # number of banks is fixed to 2 + bank_height = (parameters['glb_tile_mem_size'] * 1024 // 2) // (parameters['num_words'] * (parameters['word_size'] // 8)) + init.update_params( { 'bank_height': bank_height }, True ) + else: + init.update_params( { 'bank_height': parameters['bank_height'] }, True ) # Change nthreads synth.update_params( { 'nthreads': 4 } ) From 98c6749958371f89406343805041b9a8040e901f Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 07:50:33 -0800 Subject: [PATCH 31/63] tweak mflowgen/full_chip/construct.py to match dense version --- mflowgen/full_chip/construct.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index ec10f6688f..a11f7e38e2 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -138,7 +138,14 @@ def construct(): 'dragonphy_rdl_x' : '613.565u', 'dragonphy_rdl_y' : '3901.872u', 'hold_target_slack' : 0.060, + }) + # OG TSMC did not set use_local_garnet etc. + if adk_name == 'tsmc16': + parameters.pop('use_local_garnet') + parameters.pop('drc_env_setup') + parameters.pop('antenna_drc_env_setup') + parameters.pop('lvs_adk_view') # 'sram_2' and 'guarding' are onyx/GF-only parameters (for now) From e635f730897223ca13a99a4b5b142575087dd734 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 07:54:32 -0800 Subject: [PATCH 32/63] common mflowgen/tile_array/construct.py for amber, onyx --- mflowgen/tile_array/construct.py | 105 ++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 30 deletions(-) diff --git a/mflowgen/tile_array/construct.py b/mflowgen/tile_array/construct.py index cee85493b9..396842f7d0 100644 --- a/mflowgen/tile_array/construct.py +++ b/mflowgen/tile_array/construct.py @@ -31,6 +31,10 @@ def construct(): else: read_hdl_defines = '' + # TSMC override(s) + if adk_name == 'tsmc16': which_soc = 'amber' + else: which_soc = 'onyx' + parameters = { 'construct_path' : __file__, 'design_name' : 'Interconnect', @@ -67,6 +71,18 @@ def construct(): else: parameters['lvs_adk_view'] = adk_view + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'clock_period' : 1.1, + 'hold_target_slack' : 0.015, + }) + + # OG TSMC did not set read_hdl_defines etc. + if adk_name == 'tsmc16': + parameters.pop('read_hdl_defines') + parameters.pop('drc_env_setup') + parameters.pop('lvs_adk_view') + #----------------------------------------------------------------------- # Create nodes #----------------------------------------------------------------------- @@ -92,7 +108,8 @@ def construct(): gls_args = Step( this_dir + '/gls_args' ) testbench = Step( this_dir + '/testbench' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + if which_soc == 'onyx': + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) # Default steps @@ -112,8 +129,13 @@ def construct(): postroute_hold = Step( 'cadence-innovus-postroute_hold', default=True ) signoff = Step( 'cadence-innovus-signoff', default=True ) pt_signoff = Step( 'synopsys-pt-timing-signoff', default=True ) - pt_genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) - genlib = Step( 'cadence-innovus-genlib', default=True ) + if which_soc == 'onyx': + pt_genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + genlib = Step( 'cadence-innovus-genlib', default=True ) + else: + #genlibdb = Step( 'synopsys-ptpx-genlibdb', default=True ) + genlib = Step( 'cadence-genus-genlib', default=True ) + if which("calibre") is not None: drc = Step( 'mentor-calibre-drc', default=True ) lvs = Step( 'mentor-calibre-lvs', default=True ) @@ -124,8 +146,9 @@ def construct(): vcs_sim = Step( 'synopsys-vcs-sim', default=True ) # Separate ADK for LVS so it has PM cells when needed - lvs_adk = adk.clone() - lvs_adk.set_name( 'lvs_adk' ) + if which_soc == 'onyx': + lvs_adk = adk.clone() + lvs_adk.set_name( 'lvs_adk' ) # Add cgra tile macro inputs to downstream nodes @@ -135,10 +158,11 @@ def construct(): synth.extend_inputs( ['Tile_MemCore_tt.lib'] ) pt_signoff.extend_inputs( ['Tile_PE_tt.db'] ) pt_signoff.extend_inputs( ['Tile_MemCore_tt.db'] ) - pt_genlibdb.extend_inputs( ['Tile_PE_tt.db'] ) genlib.extend_inputs( ['Tile_PE_tt.lib'] ) - pt_genlibdb.extend_inputs( ['Tile_MemCore_tt.db'] ) genlib.extend_inputs( ['Tile_MemCore_tt.lib'] ) + if which_soc == 'onyx': + pt_genlibdb.extend_inputs( ['Tile_PE_tt.db'] ) + pt_genlibdb.extend_inputs( ['Tile_MemCore_tt.db'] ) e2e_apps = ["tests/conv_3_3", "apps/cascade", "apps/harris_auto", "apps/resnet_i1_o1_mem", "apps/resnet_i1_o1_pond"] @@ -215,6 +239,12 @@ def construct(): cts.extend_inputs( custom_cts.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python script finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- @@ -241,17 +271,18 @@ def construct(): g.add_step( signoff ) g.add_step( pt_signoff ) g.add_step( genlib ) - g.add_step( pt_genlibdb ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) g.add_step( custom_lvs ) g.add_step( lvs ) g.add_step( debugcalibre ) g.add_step( gls_args ) g.add_step( testbench ) g.add_step( vcs_sim ) - g.add_step( lvs_adk ) + if which_soc == "onyx": + g.add_step( pt_genlibdb ) + g.add_step( drc_pm ) + g.add_step( lvs_adk ) if use_e2e: for app in e2e_apps: @@ -276,8 +307,12 @@ def construct(): g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) - g.connect_by_name( lvs_adk, lvs ) + + if which_soc == "onyx": + g.connect_by_name( adk, drc_pm ) + g.connect_by_name( lvs_adk, lvs ) + else: + g.connect_by_name( adk, lvs ) if use_e2e: for app in e2e_apps: @@ -315,17 +350,17 @@ def construct(): g.connect_by_name( Tile_MemCore, postroute_hold ) g.connect_by_name( Tile_MemCore, signoff ) g.connect_by_name( Tile_MemCore, pt_signoff ) - g.connect_by_name( Tile_MemCore, pt_genlibdb ) g.connect_by_name( Tile_MemCore, genlib ) g.connect_by_name( Tile_MemCore, drc ) - g.connect_by_name( Tile_MemCore, drc_pm ) g.connect_by_name( Tile_MemCore, lvs ) # These rules LVS BOX the SRAM macro, so they should # only be used if memory tile is present g.connect_by_name( custom_lvs, lvs ) g.connect_by_name( Tile_MemCore, vcs_sim ) + if which_soc == "onyx": + g.connect_by_name( Tile_MemCore, pt_genlibdb ) + g.connect_by_name( Tile_MemCore, drc_pm ) - # inputs to Tile_PE g.connect_by_name( rtl, Tile_PE ) # outputs from Tile_PE @@ -342,11 +377,12 @@ def construct(): g.connect_by_name( Tile_PE, postroute_hold ) g.connect_by_name( Tile_PE, signoff ) g.connect_by_name( Tile_PE, pt_signoff ) - g.connect_by_name( Tile_PE, pt_genlibdb ) g.connect_by_name( Tile_PE, genlib ) g.connect_by_name( Tile_PE, drc ) - g.connect_by_name( Tile_PE, drc_pm ) g.connect_by_name( Tile_PE, lvs ) + if which_soc == "onyx": + g.connect_by_name( Tile_PE, pt_genlibdb ) + g.connect_by_name( Tile_PE, drc_pm ) #g.connect_by_name( rtl, dc ) #g.connect_by_name( constraints, dc ) @@ -379,7 +415,8 @@ def construct(): g.connect_by_name( iflow, postroute ) g.connect_by_name( iflow, postroute_hold ) g.connect_by_name( iflow, signoff ) - g.connect_by_name( iflow, genlib ) + if which_soc == "onyx": + g.connect_by_name( iflow, genlib ) g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) @@ -394,19 +431,24 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) + if which_soc == "onyx": + g.connect_by_name( signoff, drc_pm ) + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect_by_name( adk, pt_signoff ) g.connect_by_name( signoff, pt_signoff ) - - g.connect_by_name( adk, pt_genlibdb ) - g.connect_by_name( adk, genlib ) - g.connect_by_name( signoff, pt_genlibdb ) - g.connect_by_name( signoff, genlib ) + + if which_soc == "onyx": + g.connect_by_name( adk, pt_genlibdb ) + g.connect_by_name( adk, genlib ) + g.connect_by_name( signoff, pt_genlibdb ) + g.connect_by_name( signoff, genlib ) + else: + g.connect_by_name( adk, genlib ) + g.connect_by_name( signoff, genlib ) g.connect_by_name( genlib, lib2db ) @@ -415,9 +457,10 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( drc, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) + if which_soc == "onyx": + g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( adk, vcs_sim ) g.connect_by_name( testbench, vcs_sim ) @@ -432,7 +475,8 @@ def construct(): g.update_params( parameters ) # LVS adk has separate view parameter - lvs_adk.update_params({ 'adk_view' : parameters['lvs_adk_view']}) + if which_soc == "onyx": + lvs_adk.update_params({ 'adk_view' : parameters['lvs_adk_view']}) # Init needs pipeline params for floorplanning init.update_params({ 'pipeline_config_interval': parameters['pipeline_config_interval'] }, True) @@ -454,9 +498,10 @@ def construct(): # pt_genlibdb -- Remove 'report-interface-timing.tcl' beacuse it takes # very long and is not necessary - order = pt_genlibdb.get_param('order') - order.remove( 'write-interface-timing.tcl' ) - pt_genlibdb.update_params( { 'order': order } ) + if which_soc == "onyx": + order = pt_genlibdb.get_param('order') + order.remove( 'write-interface-timing.tcl' ) + pt_genlibdb.update_params( { 'order': order } ) # init -- Add 'dont-touch.tcl' before reporting From f9132a1acc66b971e86a3c061c641eee197d2645 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 09:11:08 -0800 Subject: [PATCH 33/63] common mflowgen/glb_top/construct.py for amber, onyx --- mflowgen/glb_top/construct.py | 66 +++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/mflowgen/glb_top/construct.py b/mflowgen/glb_top/construct.py index ab910cf8d1..b6896595c1 100644 --- a/mflowgen/glb_top/construct.py +++ b/mflowgen/glb_top/construct.py @@ -22,8 +22,14 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' + which_soc = 'onyx' + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'view-standard' + which_soc = 'amber' parameters = { 'construct_path' : __file__, @@ -54,6 +60,17 @@ def construct(): 'drc_env_setup': 'drcenv-block.sh' } + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'clock_period' : 1.11, + 'hold_target_slack' : 0.03, + 'use_container' : False, + }) + + # OG TSMC did not specify drc_env_setup + if adk_name == 'tsmc16': + parameters.pop('drc_env_setup') + #----------------------------------------------------------------------- # Create nodes #----------------------------------------------------------------------- @@ -77,10 +94,11 @@ def construct(): custom_init = Step( this_dir + '/custom-init' ) custom_lvs = Step( this_dir + '/custom-lvs-rules' ) custom_power = Step( this_dir + '/../common/custom-power-hierarchical' ) - custom_cts = Step( this_dir + '/custom-cts' ) genlib = Step( this_dir + '/../common/cadence-innovus-genlib' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) + if which_soc == 'onyx': + custom_cts = Step( this_dir + '/custom-cts' ) + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm' ) # Default steps @@ -153,7 +171,8 @@ def construct(): # Add glb_tile macro inputs to downstream nodes pt_signoff.extend_inputs( ['glb_tile_tt.db'] ) - genlib.extend_inputs( ['glb_tile_tt.db'] ) + if which_soc == 'onyx': + genlib.extend_inputs( ['glb_tile_tt.db'] ) # These steps need timing info for glb_tiles tile_steps = \ @@ -174,6 +193,10 @@ def construct(): # Need sram spice file for LVS lvs.extend_inputs( ['glb_tile_sram.spi'] ) + if which_soc == 'amber': + # Need glb_tile for genlib + genlib.extend_inputs( ['glb_tile_tt.lib'] ) + xlist = synth.get_postconditions() xlist = \ [ _ for _ in xlist if 'percent_clock_gated' not in _ ] @@ -183,8 +206,14 @@ def construct(): init.extend_inputs( custom_init.all_outputs() ) power.extend_inputs( custom_power.all_outputs() ) - cts.extend_inputs( custom_cts.all_outputs() ) + if which_soc == 'onyx': + cts.extend_inputs( custom_cts.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python script finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) #----------------------------------------------------------------------- # Graph -- Add nodes @@ -205,7 +234,8 @@ def construct(): g.add_step( power ) g.add_step( custom_power ) g.add_step( place ) - g.add_step( custom_cts ) + if which_soc == 'onyx': + g.add_step( custom_cts ) g.add_step( cts ) g.add_step( postcts_hold ) g.add_step( route ) @@ -216,7 +246,8 @@ def construct(): g.add_step( genlib ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) + if which_soc == 'onyx': + g.add_step( drc_pm ) g.add_step( lvs ) g.add_step( custom_lvs ) g.add_step( debugcalibre ) @@ -243,7 +274,8 @@ def construct(): g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( adk, drc_pm ) g.connect_by_name( adk, lvs ) g.connect_by_name( adk, genlib ) @@ -261,7 +293,8 @@ def construct(): g.connect_by_name( glb_tile, pt_signoff ) g.connect_by_name( glb_tile, genlib ) g.connect_by_name( glb_tile, drc ) - g.connect_by_name( glb_tile, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( glb_tile, drc_pm ) g.connect_by_name( glb_tile, lvs ) g.connect_by_name( rtl, sim_compile ) @@ -294,7 +327,8 @@ def construct(): g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) - g.connect_by_name( custom_cts, cts ) + if which_soc == 'onyx': + g.connect_by_name( custom_cts, cts ) g.connect_by_name( custom_lvs, lvs ) g.connect_by_name( init, power ) @@ -306,10 +340,12 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) + if which_soc == 'onyx': + g.connect_by_name( signoff, drc_pm ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) + if which_soc == 'onyx': + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) g.connect_by_name( adk, pt_signoff ) @@ -340,7 +376,8 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) + if which_soc == 'onyx': + g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( drc, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) @@ -375,7 +412,8 @@ def construct(): postroute_hold.update_params( { 'hold_target_slack': parameters['hold_target_slack'] }, allow_new=True ) # useful_skew - # cts.update_params( { 'useful_skew': False }, allow_new=True ) + if which_soc == "amber": + cts.update_params( { 'useful_skew': False }, allow_new=True ) return g From 85b863488b8cb056ded0eaca304fabdcc4ffa519 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 09:57:34 -0800 Subject: [PATCH 34/63] common mflowgen/global_controller/construct.py for amber, onyx --- mflowgen/global_controller/construct.py | 68 ++++++++++++++++++++----- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/mflowgen/global_controller/construct.py b/mflowgen/global_controller/construct.py index 5991ad7f99..5e682c951a 100644 --- a/mflowgen/global_controller/construct.py +++ b/mflowgen/global_controller/construct.py @@ -21,8 +21,14 @@ def construct(): # Parameters #----------------------------------------------------------------------- - adk_name = get_sys_adk() + adk_name = get_sys_adk() # E.g. 'gf12-adk' or 'tsmc16' adk_view = 'multivt' + which_soc = 'onyx' + + # TSMC override(s) + if adk_name == 'tsmc16': + adk_view = 'multicorner' + which_soc = 'amber' parameters = { 'construct_path' : __file__, @@ -44,6 +50,15 @@ def construct(): 'drc_env_setup' : 'drcenv-block.sh' } + # TSMC overrides + if adk_name == 'tsmc16': parameters.update({ + 'hold_target_slack' : 0.030, + }) + + # OG TSMC did not specify drc_env_setup + if adk_name == 'tsmc16': + parameters.pop('drc_env_setup') + #----------------------------------------------------------------------- # Create nodes #----------------------------------------------------------------------- @@ -63,8 +78,10 @@ def construct(): custom_power = Step( this_dir + '/../common/custom-power-leaf' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) lib2db = Step( this_dir + '/../common/synopsys-dc-lib2db' ) - drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm') - drc_mas = Step( this_dir + '/../common/gf-mentor-calibre-drc-mas' ) + if which_soc == "onyx": + drc_pm = Step( this_dir + '/../common/gf-mentor-calibre-drcplus-pm') + drc_mas = Step( this_dir + '/../common/gf-mentor-calibre-drc-mas' ) + # Default steps @@ -82,7 +99,10 @@ def construct(): postroute_hold = Step( 'cadence-innovus-postroute_hold',default=True ) signoff = Step( 'cadence-innovus-signoff', default=True ) pt_signoff = Step( 'synopsys-pt-timing-signoff', default=True ) - genlib = Step( 'cadence-innovus-genlib', default=True ) + if which_soc == "onyx": + genlib = Step( 'cadence-innovus-genlib', default=True ) + else: + genlib = Step( 'cadence-genus-genlib', default=True ) if which("calibre") is not None: drc = Step( 'mentor-calibre-drc', default=True ) lvs = Step( 'mentor-calibre-lvs', default=True ) @@ -96,6 +116,12 @@ def construct(): init.extend_inputs( custom_init.all_outputs() ) power.extend_inputs( custom_power.all_outputs() ) + # TSMC needs streamout *without* the (new) default -uniquify flag + # This python script finds 'stream-out.tcl' and strips out that flag. + if adk_name == "tsmc16": + from common.streamout_no_uniquify import streamout_no_uniquify + streamout_no_uniquify(iflow) + #----------------------------------------------------------------------- # Graph -- Add nodes #----------------------------------------------------------------------- @@ -120,8 +146,9 @@ def construct(): g.add_step( genlib ) g.add_step( lib2db ) g.add_step( drc ) - g.add_step( drc_pm ) - g.add_step( drc_mas ) + if which_soc == "onyx": + g.add_step( drc_pm ) + g.add_step( drc_mas ) g.add_step( lvs ) g.add_step( debugcalibre ) @@ -143,8 +170,9 @@ def construct(): g.connect_by_name( adk, postroute_hold ) g.connect_by_name( adk, signoff ) g.connect_by_name( adk, drc ) - g.connect_by_name( adk, drc_pm ) - g.connect_by_name( adk, drc_mas ) + if which_soc == "onyx": + g.connect_by_name( adk, drc_pm ) + g.connect_by_name( adk, drc_mas ) g.connect_by_name( adk, lvs ) g.connect_by_name( rtl, synth ) @@ -165,7 +193,8 @@ def construct(): g.connect_by_name( iflow, postroute ) g.connect_by_name( iflow, postroute_hold ) g.connect_by_name( iflow, signoff ) - g.connect_by_name( iflow, genlib ) + if which_soc == "onyx": + g.connect_by_name( iflow, genlib ) g.connect_by_name( custom_init, init ) g.connect_by_name( custom_power, power ) @@ -179,12 +208,14 @@ def construct(): g.connect_by_name( postroute, postroute_hold ) g.connect_by_name( postroute_hold, signoff ) g.connect_by_name( signoff, drc ) - g.connect_by_name( signoff, drc_pm ) - g.connect_by_name( signoff, drc_mas ) + if which_soc == "onyx": + g.connect_by_name( signoff, drc_pm ) + g.connect_by_name( signoff, drc_mas ) g.connect_by_name( signoff, lvs ) g.connect(signoff.o('design-merged.gds'), drc.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) - g.connect(signoff.o('design-merged.gds'), drc_mas.i('design_merged.gds')) + if which_soc == "onyx": + g.connect(signoff.o('design-merged.gds'), drc_pm.i('design_merged.gds')) + g.connect(signoff.o('design-merged.gds'), drc_mas.i('design_merged.gds')) g.connect(signoff.o('design-merged.gds'), lvs.i('design_merged.gds')) g.connect_by_name( signoff, genlib ) @@ -199,8 +230,9 @@ def construct(): g.connect_by_name( synth, debugcalibre ) g.connect_by_name( iflow, debugcalibre ) g.connect_by_name( signoff, debugcalibre ) - g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( drc, debugcalibre ) + if which_soc == "onyx": + g.connect_by_name( drc_pm, debugcalibre ) g.connect_by_name( lvs, debugcalibre ) #----------------------------------------------------------------------- @@ -213,6 +245,14 @@ def construct(): # steps, we modify the order parameter for that node which determines # which scripts get run and when they get run. + if which_soc == "amber": + # init -- Add 'add-endcaps-welltaps.tcl' after 'floorplan.tcl' + + order = init.get_param('order') # get the default script run order + floorplan_idx = order.index( 'floorplan.tcl' ) # find floorplan.tcl + order.insert( floorplan_idx + 1, 'add-endcaps-welltaps.tcl' ) # add here + init.update_params( { 'order': order } ) + # Add density target parameter init.update_params( { 'core_density_target': parameters['core_density_target'] }, True ) From bc3bc37937c240ea02d8cccb04f9be01400858b7 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 11:08:03 -0800 Subject: [PATCH 35/63] undid an error --- mflowgen/full_chip/construct.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index a11f7e38e2..aa6ce2f60d 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -145,7 +145,6 @@ def construct(): parameters.pop('use_local_garnet') parameters.pop('drc_env_setup') parameters.pop('antenna_drc_env_setup') - parameters.pop('lvs_adk_view') # 'sram_2' and 'guarding' are onyx/GF-only parameters (for now) From 0866a07ef04325765aca9d35bacde2be7200f1b7 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 9 Dec 2022 11:37:59 -0800 Subject: [PATCH 36/63] undid an(other?) error --- mflowgen/full_chip/construct.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index aa6ce2f60d..774ea83415 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -140,9 +140,8 @@ def construct(): 'hold_target_slack' : 0.060, }) - # OG TSMC did not set use_local_garnet etc. + # OG TSMC did not set drc_env_setup etc. if adk_name == 'tsmc16': - parameters.pop('use_local_garnet') parameters.pop('drc_env_setup') parameters.pop('antenna_drc_env_setup') From d03ca247c33258e1ab1aa3b026fe3b1f14d313d4 Mon Sep 17 00:00:00 2001 From: steveri Date: Tue, 13 Dec 2022 08:57:39 -0800 Subject: [PATCH 37/63] final tweak on fc/construct.py -- this one passed full build --- mflowgen/full_chip/construct.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index 774ea83415..6aaf935cf8 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -142,6 +142,7 @@ def construct(): }) # OG TSMC did not set drc_env_setup etc. if adk_name == 'tsmc16': + parameters.pop('use_local_garnet') parameters.pop('drc_env_setup') parameters.pop('antenna_drc_env_setup') From a2f4ad990a191fb50317e513df9af9e47757d0ba Mon Sep 17 00:00:00 2001 From: steveri Date: Tue, 13 Dec 2022 09:02:39 -0800 Subject: [PATCH 38/63] added a check to early-out TSMC build failures --- .buildkite/pipeline_fullchip.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline_fullchip.yml b/.buildkite/pipeline_fullchip.yml index 4295540580..74fbb57564 100644 --- a/.buildkite/pipeline_fullchip.yml +++ b/.buildkite/pipeline_fullchip.yml @@ -35,7 +35,12 @@ steps: commands: - 'source mflowgen/bin/setup-buildkite.sh --dir $$GOLD --need_space 100G; echo "--- MAKE RTL"; set -o pipefail; - make rtl |& tee make-rtl.log' + make rtl |& tee make-rtl.log; + set -x; + grep "module precoder_32_1" 10-rtl/outputs/design.v | sort; + grep "module precoder_32_19" 10-rtl/outputs/design.v && echo FAIL || echo PASS; + grep "module precoder_32_19" 10-rtl/outputs/design.v && exit 13 || exit 0; +' - wait: ~ # Note: "echo exit 13" prevents hang at genus/innovus prompt, allows clean fail From 759b4fa28657518982b4a190f76aa8c6478d0d68 Mon Sep 17 00:00:00 2001 From: steveri Date: Tue, 13 Dec 2022 09:05:35 -0800 Subject: [PATCH 39/63] buildkite pipeline tweaks --- .buildkite/pipelines/fullchip_pe.yml | 5 -- .buildkite/pipelines/fullchip_rtl_only.yml | 31 ++++------ .buildkite/pipelines/mem_tile_only.yml | 2 +- .buildkite/pipelines/rtl_only.yml | 66 +++++++++++++++------- 4 files changed, 60 insertions(+), 44 deletions(-) diff --git a/.buildkite/pipelines/fullchip_pe.yml b/.buildkite/pipelines/fullchip_pe.yml index 6cedac09dd..df9645d49a 100644 --- a/.buildkite/pipelines/fullchip_pe.yml +++ b/.buildkite/pipelines/fullchip_pe.yml @@ -1,8 +1,3 @@ -# run PE LVS ONLY - -# agents: { jobsize: "hours" } -agents: { queue: "papers" } - ############################################################################## # Use this to test a specific branch/commit: # Add to env: diff --git a/.buildkite/pipelines/fullchip_rtl_only.yml b/.buildkite/pipelines/fullchip_rtl_only.yml index 4a132d6985..9604f38cb1 100644 --- a/.buildkite/pipelines/fullchip_rtl_only.yml +++ b/.buildkite/pipelines/fullchip_rtl_only.yml @@ -1,35 +1,28 @@ -agents: { queue: "papers" } +# Run full_chip setup stage only env: - GOLD: /build/rtl.${BUILDKITE_BUILD_NUMBER}/full_chip - LVS_CHECK: ./mflowgen/bin/buildcheck.sh --lvs + # BDIR: . - # To support postroute_hold retries - PRH : 'eval $$GARNET_HOME/.buildkite/bin/prh.sh' + # Can optionally save results in /build ... + BDIR: /build/mem${BUILDKITE_BUILD_NUMBER} - # ($FAIL|$UPLOAD): Inserts a "fail" bubble in the buildkite pipeline log - FAIL1 : 'echo steps : [ { label : FAIL->retry1 , command : exit } ]' - FAIL2 : 'echo steps : [ { label : FAIL->retry2 , command : exit } ]' - UPLOAD : 'buildkite-agent pipeline upload' + # OVERRIDE_MFLOWGEN_BRANCH: silent_fail - # Set slack to -0.3 to make postroute_hold much faster. - # Default targ slack for full_chip @ 0.06 takes 6 hours atm. - # With hack target -0.3, should be about 2.5 hours (saves 3.5 hours) - # MFLOWGEN_PARM_OVERRIDE_hold_target_slack : -0.3 +steps: - # Can use this to change target mflowgen branch - # OVERRIDE_MFLOWGEN_BRANCH: glob-prob +############################################################################## +# INDIVIDUAL TILE RUNS - Mem tile only steps: -- label: 'setup' +- label: 'fc setup' commands: - - 'source mflowgen/bin/setup-buildkite.sh --dir $$GOLD; + - 'source mflowgen/bin/setup-buildkite.sh --dir $$BDIR; mflowgen run --design $$GARNET_HOME/mflowgen/full_chip' - wait: ~ -- label: 'rtl' +- label: 'fc rtl' commands: - - 'source mflowgen/bin/setup-buildkite.sh --dir $$GOLD --need_space 100G; + - 'source mflowgen/bin/setup-buildkite.sh --dir $$BDIR --need_space 100G; echo "--- MAKE RTL"; set -o pipefail; make rtl |& tee make-rtl.log' - wait: ~ diff --git a/.buildkite/pipelines/mem_tile_only.yml b/.buildkite/pipelines/mem_tile_only.yml index f72c8fd4b0..55f4aecf75 100644 --- a/.buildkite/pipelines/mem_tile_only.yml +++ b/.buildkite/pipelines/mem_tile_only.yml @@ -15,7 +15,7 @@ steps: mflowgen run --design $$GARNET_HOME/mflowgen/tile_array' - wait: ~ -- label: 'MemTile' +- label: 'MemTile 2hr' commands: - 'source mflowgen/bin/setup-buildkite.sh --dir $$BDIR --need_space 30G; set -o pipefail; diff --git a/.buildkite/pipelines/rtl_only.yml b/.buildkite/pipelines/rtl_only.yml index d5af4c099a..facf621abf 100644 --- a/.buildkite/pipelines/rtl_only.yml +++ b/.buildkite/pipelines/rtl_only.yml @@ -1,28 +1,56 @@ -agents: { jobsize: "hours" } +# ############################################################################## +# # Use this to test a specific branch/commit: +# # Add to env: +# # NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a +# # Add to commands: +# # - pwd; git branch; git checkout $$NOV11 +# +# ############################################################################## +# # Note: "echo exit 13" prevents hang at genus/innovus prompt +# env: +# TEST: 'echo exit 13 | mflowgen/test/test_module.sh' +# +# steps: +# +# ############################################################################## +# # INDIVIDUAL TILE RUNS +# # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# +# - label: 'FULL_CHIP RTL' +# commands: +# - $TEST --need_space 30G full_chip --steps rtl --debug +# - 'rtl_dir=full_chip/*-rtl; +# if grep Traceback $$rtl_dir/mflowgen-run.log; +# then echo oh no failed silently oh no; +# else exit 0; +# fi' -############################################################################## -# Use this to test a specific branch/commit: -# Add to env: -# NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a -# Add to commands: -# - pwd; git branch; git checkout $$NOV11 - -############################################################################## -# Note: "echo exit 13" prevents hang at genus/innovus prompt env: TEST: 'echo exit 13 | mflowgen/test/test_module.sh' + SETUP: 'source mflowgen/bin/setup-buildkite.sh' + TEST_MODULE_SBFLAGS: '--skip_mflowgen' + BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} + RTL: /build/fcrtl-${BUILDKITE_BUILD_NUMBER}/full_chip/10-rtl/outputs/design.v + RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' steps: ############################################################################## -# INDIVIDUAL TILE RUNS -# Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# COMMON SETUP to initialize mflowgen + +- label: 'setup 2m' + commands: + - ' + echo "--- SETUP"; + set -o pipefail; + $$SETUP --dir .; + ' +- wait -- label: 'FULL_CHIP RTL' +- label: 'RTL only' commands: - - $TEST --need_space 30G full_chip --steps rtl --debug - - 'rtl_dir=full_chip/*-rtl; - if grep Traceback $$rtl_dir/mflowgen-run.log; - then echo oh no failed silently oh no; - else exit 0; - fi' + - $TEST --build_dir $BUILD full_chip --steps rtl --debug + - ls -l $$RTL + - grep 'module precoder_32_1' $$RTL | sort + - grep 'module precoder_32_19' $$RTL && echo FAIL || echo PASS + - grep 'module precoder_32_19' $$RTL && exit 13 || exit 0 From 6007a4b74613c2c33dccb555d94b96bbdb47282b Mon Sep 17 00:00:00 2001 From: steveri Date: Tue, 13 Dec 2022 09:17:26 -0800 Subject: [PATCH 40/63] updated vs. latest garnet.py from spVspV branch --- garnet.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/garnet.py b/garnet.py index 88dbe307ba..14204fd0cb 100644 --- a/garnet.py +++ b/garnet.py @@ -429,9 +429,6 @@ def load_netlist(self, app, load_only, pipeline_input_broadcasts, input_broadcast_branch_factor, input_broadcast_max_leaves) - # print("NETLIST INFO") - # print(netlist_info) - mem_remap = None pe_remap = None @@ -443,27 +440,17 @@ def load_netlist(self, app, load_only, pipeline_input_broadcasts, pnr_tag = pnr_tag.tag_name if pnr_tag == "m" and mem_remap is None: mem_remap = actual_core.get_port_remap() - # print("MEM PORT REMAP!") - # print(mem_remap) elif pnr_tag == "p" and pe_remap is None: pe_remap = actual_core.get_port_remap() elif mem_remap is not None and pe_remap is not None: break - # print(mem_remap) - # print(pe_remap) - - # Remap here... - # print("Actual netlist...") - # print(netlist_info['netlist']) for netlist_id, connections_list in netlist_info['netlist'].items(): for idx, connection in enumerate(connections_list): tag_, pin_ = connection if tag_[0] == 'm': # get mode... metadata = netlist_info['id_to_metadata'][tag_] - # print("metadata...") - # print(metadata) mode = "UB" if 'stencil_valid' in metadata["config"]: mode = 'stencil_valid' @@ -472,28 +459,17 @@ def load_netlist(self, app, load_only, pipeline_input_broadcasts, # Actually use wr addr for rom mode... hack_remap = { 'addr_in_0': 'wr_addr_in', - #'addr_in_0': 'rd_addr_in', 'ren_in_0': 'ren', 'data_out_0': 'data_out' } assert pin_ in hack_remap pin_ = hack_remap[pin_] - # print("SHOWING REMAP") - # print(f"MODE {mode}") - # print(mem_remap[mode]) - # print(f"remapping pin {pin_} to {mem_remap[mode][pin_]}") pin_remap = mem_remap[mode][pin_] connections_list[idx] = (tag_, pin_remap) elif tag_[0] == 'p': pin_remap = pe_remap['alu'][pin_] connections_list[idx] = (tag_, pin_remap) netlist_info['netlist'][netlist_id] = connections_list - # Remap the memtile or pondtile pins - #core = self.interconnect.placement[tag_] - #print(core) - #if tag_[0] == 'm - - # print(netlist_info['netlist']) if not self.amber_pond: # temporally remapping of port names for the new Pond From 96e811518199cf37fb63c9dc011a9b0e6ee229d1 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 09:14:43 -0800 Subject: [PATCH 41/63] emergency(?) fix for CI --- .buildkite/pipelines/rtl_only.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipelines/rtl_only.yml b/.buildkite/pipelines/rtl_only.yml index facf621abf..f1f8a6a3e1 100644 --- a/.buildkite/pipelines/rtl_only.yml +++ b/.buildkite/pipelines/rtl_only.yml @@ -31,7 +31,7 @@ env: TEST_MODULE_SBFLAGS: '--skip_mflowgen' BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} RTL: /build/fcrtl-${BUILDKITE_BUILD_NUMBER}/full_chip/10-rtl/outputs/design.v - RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' + # RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' steps: From 21fe6f2045699cf00907dfe1b7b57f82332a404b Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 09:24:15 -0800 Subject: [PATCH 42/63] try the previously-passing version of rtl_only.yml --- .buildkite/pipelines/rtl_only.yml | 66 +++++++++---------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/.buildkite/pipelines/rtl_only.yml b/.buildkite/pipelines/rtl_only.yml index f1f8a6a3e1..d5af4c099a 100644 --- a/.buildkite/pipelines/rtl_only.yml +++ b/.buildkite/pipelines/rtl_only.yml @@ -1,56 +1,28 @@ -# ############################################################################## -# # Use this to test a specific branch/commit: -# # Add to env: -# # NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a -# # Add to commands: -# # - pwd; git branch; git checkout $$NOV11 -# -# ############################################################################## -# # Note: "echo exit 13" prevents hang at genus/innovus prompt -# env: -# TEST: 'echo exit 13 | mflowgen/test/test_module.sh' -# -# steps: -# -# ############################################################################## -# # INDIVIDUAL TILE RUNS -# # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore -# -# - label: 'FULL_CHIP RTL' -# commands: -# - $TEST --need_space 30G full_chip --steps rtl --debug -# - 'rtl_dir=full_chip/*-rtl; -# if grep Traceback $$rtl_dir/mflowgen-run.log; -# then echo oh no failed silently oh no; -# else exit 0; -# fi' +agents: { jobsize: "hours" } +############################################################################## +# Use this to test a specific branch/commit: +# Add to env: +# NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a +# Add to commands: +# - pwd; git branch; git checkout $$NOV11 + +############################################################################## +# Note: "echo exit 13" prevents hang at genus/innovus prompt env: TEST: 'echo exit 13 | mflowgen/test/test_module.sh' - SETUP: 'source mflowgen/bin/setup-buildkite.sh' - TEST_MODULE_SBFLAGS: '--skip_mflowgen' - BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} - RTL: /build/fcrtl-${BUILDKITE_BUILD_NUMBER}/full_chip/10-rtl/outputs/design.v - # RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' steps: ############################################################################## -# COMMON SETUP to initialize mflowgen - -- label: 'setup 2m' - commands: - - ' - echo "--- SETUP"; - set -o pipefail; - $$SETUP --dir .; - ' -- wait +# INDIVIDUAL TILE RUNS +# Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore -- label: 'RTL only' +- label: 'FULL_CHIP RTL' commands: - - $TEST --build_dir $BUILD full_chip --steps rtl --debug - - ls -l $$RTL - - grep 'module precoder_32_1' $$RTL | sort - - grep 'module precoder_32_19' $$RTL && echo FAIL || echo PASS - - grep 'module precoder_32_19' $$RTL && exit 13 || exit 0 + - $TEST --need_space 30G full_chip --steps rtl --debug + - 'rtl_dir=full_chip/*-rtl; + if grep Traceback $$rtl_dir/mflowgen-run.log; + then echo oh no failed silently oh no; + else exit 0; + fi' From 570850e6a8548c0d68150872eec7578f30f09240 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 09:48:28 -0800 Subject: [PATCH 43/63] try with local garnet --- mflowgen/full_chip/construct.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index 6aaf935cf8..a34e8264fd 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -76,7 +76,6 @@ def construct(): 'array_height' : 16, 'num_glb_tiles' : 16, 'interconnect_only' : False, - 'use_local_garnet' : False, # glb tile memory size (unit: KB) # 'glb_tile_mem_size' : 64, # 64x16 => 1M global buffer 'glb_tile_mem_size' : 256, # 256*16 => 4M global buffer From 672cecb554a6b88b41a195043971237cdfdad8e1 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 10:25:37 -0800 Subject: [PATCH 44/63] fixed a thing maybe --- global_buffer/design/glb_bank_sram_gen.py | 2 +- mflowgen/full_chip/construct.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_gen.py b/global_buffer/design/glb_bank_sram_gen.py index d723bf3893..2fb826c7cf 100644 --- a/global_buffer/design/glb_bank_sram_gen.py +++ b/global_buffer/design/glb_bank_sram_gen.py @@ -10,7 +10,7 @@ def __init__(self, addr_width, _params: GlobalBufferParams): # FIXME this TSMC/GF fix might goo away after the smoke # clears, but for now it gets us closer to a common master - if self._params.process == "TSMC": + if _params.process == "TSMC": super().__init__("glb_bank_sram_gen") elif self._params.process == "GF": super().__init__(f"glb_bank_sram_gen_{addr_width}") diff --git a/mflowgen/full_chip/construct.py b/mflowgen/full_chip/construct.py index a34e8264fd..6aaf935cf8 100644 --- a/mflowgen/full_chip/construct.py +++ b/mflowgen/full_chip/construct.py @@ -76,6 +76,7 @@ def construct(): 'array_height' : 16, 'num_glb_tiles' : 16, 'interconnect_only' : False, + 'use_local_garnet' : False, # glb tile memory size (unit: KB) # 'glb_tile_mem_size' : 64, # 64x16 => 1M global buffer 'glb_tile_mem_size' : 256, # 256*16 => 4M global buffer From 643a9a2676d6d613a3c1a85fad353a0a78a812d4 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 10:58:25 -0800 Subject: [PATCH 45/63] fixed a thing maybe 2 --- global_buffer/design/glb_bank_sram_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_gen.py b/global_buffer/design/glb_bank_sram_gen.py index 2fb826c7cf..37ca1b2bc2 100644 --- a/global_buffer/design/glb_bank_sram_gen.py +++ b/global_buffer/design/glb_bank_sram_gen.py @@ -12,7 +12,7 @@ def __init__(self, addr_width, _params: GlobalBufferParams): # clears, but for now it gets us closer to a common master if _params.process == "TSMC": super().__init__("glb_bank_sram_gen") - elif self._params.process == "GF": + elif _params.process == "GF": super().__init__(f"glb_bank_sram_gen_{addr_width}") self._params = _params From c38db6363c36a3b2c02c575a94d4fced53b10a3a Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 11:08:19 -0800 Subject: [PATCH 46/63] try again with previously-merged rtl-only pipeline --- .buildkite/pipelines/rtl_only.yml | 66 ++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/.buildkite/pipelines/rtl_only.yml b/.buildkite/pipelines/rtl_only.yml index d5af4c099a..facf621abf 100644 --- a/.buildkite/pipelines/rtl_only.yml +++ b/.buildkite/pipelines/rtl_only.yml @@ -1,28 +1,56 @@ -agents: { jobsize: "hours" } +# ############################################################################## +# # Use this to test a specific branch/commit: +# # Add to env: +# # NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a +# # Add to commands: +# # - pwd; git branch; git checkout $$NOV11 +# +# ############################################################################## +# # Note: "echo exit 13" prevents hang at genus/innovus prompt +# env: +# TEST: 'echo exit 13 | mflowgen/test/test_module.sh' +# +# steps: +# +# ############################################################################## +# # INDIVIDUAL TILE RUNS +# # Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# +# - label: 'FULL_CHIP RTL' +# commands: +# - $TEST --need_space 30G full_chip --steps rtl --debug +# - 'rtl_dir=full_chip/*-rtl; +# if grep Traceback $$rtl_dir/mflowgen-run.log; +# then echo oh no failed silently oh no; +# else exit 0; +# fi' -############################################################################## -# Use this to test a specific branch/commit: -# Add to env: -# NOV11: ee214ef77b827f969e4b5f056f5d866cf391be7a -# Add to commands: -# - pwd; git branch; git checkout $$NOV11 - -############################################################################## -# Note: "echo exit 13" prevents hang at genus/innovus prompt env: TEST: 'echo exit 13 | mflowgen/test/test_module.sh' + SETUP: 'source mflowgen/bin/setup-buildkite.sh' + TEST_MODULE_SBFLAGS: '--skip_mflowgen' + BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} + RTL: /build/fcrtl-${BUILDKITE_BUILD_NUMBER}/full_chip/10-rtl/outputs/design.v + RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' steps: ############################################################################## -# INDIVIDUAL TILE RUNS -# Builds in dir e.g. mflowgen/full_chip/19-tile_array/16-Tile_MemCore +# COMMON SETUP to initialize mflowgen + +- label: 'setup 2m' + commands: + - ' + echo "--- SETUP"; + set -o pipefail; + $$SETUP --dir .; + ' +- wait -- label: 'FULL_CHIP RTL' +- label: 'RTL only' commands: - - $TEST --need_space 30G full_chip --steps rtl --debug - - 'rtl_dir=full_chip/*-rtl; - if grep Traceback $$rtl_dir/mflowgen-run.log; - then echo oh no failed silently oh no; - else exit 0; - fi' + - $TEST --build_dir $BUILD full_chip --steps rtl --debug + - ls -l $$RTL + - grep 'module precoder_32_1' $$RTL | sort + - grep 'module precoder_32_19' $$RTL && echo FAIL || echo PASS + - grep 'module precoder_32_19' $$RTL && exit 13 || exit 0 From 16b2b861e3cea2db2bfd24148dd3347acaa47d56 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 14 Dec 2022 11:16:35 -0800 Subject: [PATCH 47/63] final merge-tweak of this stupid file --- global_buffer/design/glb_bank_sram_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global_buffer/design/glb_bank_sram_gen.py b/global_buffer/design/glb_bank_sram_gen.py index 37ca1b2bc2..50d2660114 100644 --- a/global_buffer/design/glb_bank_sram_gen.py +++ b/global_buffer/design/glb_bank_sram_gen.py @@ -8,7 +8,7 @@ class GlbBankSramGen(Generator): def __init__(self, addr_width, _params: GlobalBufferParams): - # FIXME this TSMC/GF fix might goo away after the smoke + # FIXME this TSMC/GF fix might go away after the smoke # clears, but for now it gets us closer to a common master if _params.process == "TSMC": super().__init__("glb_bank_sram_gen") From 234a672643f4fa571f604e9b5f739c446f4bea99 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 15 Dec 2022 08:00:37 -0800 Subject: [PATCH 48/63] merged latest set of files from $smd --- .buildkite/pipeline_fullchip.yml | 1 + .buildkite/pipelines/pmg.yml | 1 + .buildkite/pipelines/rtl_only.yml | 15 +++-- global_buffer/Makefile | 14 +++++ .../testvectors_amber/gen_glb_test.py | 56 +++++++++++++++++++ global_buffer/testvectors_amber/test01.txt | 15 +++++ global_buffer/testvectors_amber/test02.txt | 15 +++++ global_buffer/testvectors_amber/test03.txt | 27 +++++++++ global_buffer/testvectors_amber/test04.txt | 15 +++++ global_buffer/testvectors_amber/test05.txt | 15 +++++ global_buffer/testvectors_amber/test06.txt | 27 +++++++++ global_buffer/testvectors_amber/test07.txt | 15 +++++ global_buffer/testvectors_amber/test08.txt | 15 +++++ global_buffer/testvectors_amber/test09.txt | 9 +++ global_buffer/testvectors_amber/test10.txt | 9 +++ global_buffer/testvectors_amber/test11.txt | 15 +++++ global_buffer/testvectors_amber/test12.txt | 18 ++++++ global_buffer/testvectors_amber/test13.txt | 18 ++++++ 18 files changed, 294 insertions(+), 6 deletions(-) create mode 100644 global_buffer/testvectors_amber/gen_glb_test.py create mode 100644 global_buffer/testvectors_amber/test01.txt create mode 100644 global_buffer/testvectors_amber/test02.txt create mode 100644 global_buffer/testvectors_amber/test03.txt create mode 100644 global_buffer/testvectors_amber/test04.txt create mode 100644 global_buffer/testvectors_amber/test05.txt create mode 100644 global_buffer/testvectors_amber/test06.txt create mode 100644 global_buffer/testvectors_amber/test07.txt create mode 100644 global_buffer/testvectors_amber/test08.txt create mode 100644 global_buffer/testvectors_amber/test09.txt create mode 100644 global_buffer/testvectors_amber/test10.txt create mode 100644 global_buffer/testvectors_amber/test11.txt create mode 100644 global_buffer/testvectors_amber/test12.txt create mode 100644 global_buffer/testvectors_amber/test13.txt diff --git a/.buildkite/pipeline_fullchip.yml b/.buildkite/pipeline_fullchip.yml index 74fbb57564..e353cd3040 100644 --- a/.buildkite/pipeline_fullchip.yml +++ b/.buildkite/pipeline_fullchip.yml @@ -2,6 +2,7 @@ # agents: { queue: "papers" } env: + WHICH_SOC: amber GOLD: /build/gold.${BUILDKITE_BUILD_NUMBER}/full_chip LVS_CHECK: ./mflowgen/bin/buildcheck.sh --lvs diff --git a/.buildkite/pipelines/pmg.yml b/.buildkite/pipelines/pmg.yml index ad9efd7983..d63b9741f9 100644 --- a/.buildkite/pipelines/pmg.yml +++ b/.buildkite/pipelines/pmg.yml @@ -10,6 +10,7 @@ # ------------------------------------------------- env: + WHICH_SOC: amber SETUP: source mflowgen/bin/setup-buildkite.sh # Env var used by test_module.sh :( diff --git a/.buildkite/pipelines/rtl_only.yml b/.buildkite/pipelines/rtl_only.yml index facf621abf..8dfe0b1f79 100644 --- a/.buildkite/pipelines/rtl_only.yml +++ b/.buildkite/pipelines/rtl_only.yml @@ -26,12 +26,14 @@ # fi' env: + # BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} + BUILD: . + TEST: 'echo exit 13 | mflowgen/test/test_module.sh' SETUP: 'source mflowgen/bin/setup-buildkite.sh' TEST_MODULE_SBFLAGS: '--skip_mflowgen' - BUILD: /build/fcrtl-${BUILDKITE_BUILD_NUMBER} - RTL: /build/fcrtl-${BUILDKITE_BUILD_NUMBER}/full_chip/10-rtl/outputs/design.v RTL_DOCKER_IMAGE: 'stanfordaha/garnet@sha256:dd688c7b98b034dadea9f7177781c97a7a030d737ae4751a78dbb97ae8b72af4' + WHICH_SOC: amber steps: @@ -50,7 +52,8 @@ steps: - label: 'RTL only' commands: - $TEST --build_dir $BUILD full_chip --steps rtl --debug - - ls -l $$RTL - - grep 'module precoder_32_1' $$RTL | sort - - grep 'module precoder_32_19' $$RTL && echo FAIL || echo PASS - - grep 'module precoder_32_19' $$RTL && exit 13 || exit 0 + - 'RTL=$BUILD/full_chip/*-rtl/outputs/design.v; + grep "module precoder_32_1" $$RTL | sort; + grep "module precoder_32_19" $$RTL && echo FAIL || echo PASS; + grep "module precoder_32_19" $$RTL && exit 13 || exit 0; + ' diff --git a/global_buffer/Makefile b/global_buffer/Makefile index f0e73e3893..a97e621071 100644 --- a/global_buffer/Makefile +++ b/global_buffer/Makefile @@ -45,8 +45,22 @@ RUN_LOG ?= run.log HEADER_FILES ?= header/global_buffer_param.svh header/glb.svh DESIGN_FILES ?= global_buffer.sv systemRDL/output/glb_pio.sv systemRDL/output/glb_jrdl_decode.sv systemRDL/output/glb_jrdl_logic.sv NETLIST_FILES ?= gls/glb.vcs.v gls/glb_tile.vcs.v gls/glb_tile_sram.v gls/stdcells.v gls/stdcells-prim.v +ifeq ($(WHICH_SOC), amber) + #FIXME: pm cells should be removed! + NETLIST_FILES ?= gls/glb.vcs.v gls/glb_tile.vcs.v gls/glb_tile_sram.v gls/stdcells.v gls/stdcells-pm.v +endif + TB_FILES ?= -F sim/tb_global_buffer.f +# test vectors +# To succeed, amber builds will need to set WHICH_SOC env var to "amber" +# onyx builds don't need to do anything new +ifeq ($(WHICH_SOC), amber) + TESTVECTORS = testvectors_amber +else + TESTVECTORS = testvectors +endif + # ------------------------------------------------------------------- # Commands # ------------------------------------------------------------------- diff --git a/global_buffer/testvectors_amber/gen_glb_test.py b/global_buffer/testvectors_amber/gen_glb_test.py new file mode 100644 index 0000000000..6819cc7544 --- /dev/null +++ b/global_buffer/testvectors_amber/gen_glb_test.py @@ -0,0 +1,56 @@ +import random +import argparse + +def gen_reg_pair(f_reglist, f_regpair): + with open(f_reglist, 'r') as reglist: + num = 0 + for line in reglist: + if line.startswith("0x"): + num += 1 + + with open(f_reglist, 'r') as reglist, open(f_regpair, 'w') as regpair: + regpair.write(f"{num}\n") + for line in reglist: + if not line.startswith("0x"): + continue + word_list = line.split() + addr = word_list[0][2:] + bits = word_list[6] + data = 2**int(bits) - 1 + regpair.write(f"{addr} {data}\n") + +def gen_bs_sample(filename, num): + with open(filename, 'w') as f: + f.write(f"{num}\n") + for i in range(num): + # addr = random.randrange(0, 2**32) + col = random.randrange(0, 32) + reg = random.randrange(0, 2**8) + data = random.randrange(0, 2**32) + f.write(f"{(reg << 8 ) | col} ") + f.write(f"{data}\n") + +def gen_data_sample(filename, width, num): + with open(filename, 'w') as f: + # f.write(f"{num}\n") + for i in range(num): + x = random.randrange(0, 2**width) + f.write(f"{hex(x)[2:]} ") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='testvector generator') + parser.add_argument('--data', type=str, default=None) + parser.add_argument('--width', type=int, default=16) + parser.add_argument('--num', type=int, default=32) + parser.add_argument('--config', type=str, default=None) + parser.add_argument('--seed', type=int, default=1) + parser.add_argument('--bitstream', type=str, default=None) + parser.add_argument('--bitstream-size', type=int, default=32) + args = parser.parse_args() + random.seed(args.seed) + if args.config: + gen_reg_pair("../systemRDL/output/glb.reglist", args.config) + if args.bitstream: + gen_bs_sample(args.bitstream, args.bitstream_size) + if args.data: + gen_data_sample(args.data, args.width, args.num) diff --git a/global_buffer/testvectors_amber/test01.txt b/global_buffer/testvectors_amber/test01.txt new file mode 100644 index 0000000000..21091df6c8 --- /dev/null +++ b/global_buffer/testvectors_amber/test01.txt @@ -0,0 +1,15 @@ +// WR: tile0/bank29/512, RD: tile0/bank27/512 +2 +00 +WR 0 29 131000 0 +1 +512 +1 +1 +testvectors/512_v0.dat +RD 0 27 131000 0 +1 +512 +1 +1 +testvectors/512_v1.dat diff --git a/global_buffer/testvectors_amber/test02.txt b/global_buffer/testvectors_amber/test02.txt new file mode 100644 index 0000000000..dfafb0c88d --- /dev/null +++ b/global_buffer/testvectors_amber/test02.txt @@ -0,0 +1,15 @@ +// G2F: tile15/bank31/512, F2G: tile0/bank1/512 +2 +00 +G2F 15 31 0 0 +1 +512 +1 +1 +testvectors/512_v0.dat +F2G 0 1 0 0 +1 +512 +1 +1 +testvectors/512_v1.dat diff --git a/global_buffer/testvectors_amber/test03.txt b/global_buffer/testvectors_amber/test03.txt new file mode 100644 index 0000000000..328704e159 --- /dev/null +++ b/global_buffer/testvectors_amber/test03.txt @@ -0,0 +1,27 @@ +// Four transactions simultaneously. Double buffer. WR: tile2/bank4, G2F: tile2/bank5, F2G: tile3/bank6, RD: tile3/bank7 +4 +00 +WR 2 4 0 0 +1 +4096 +1 +1 +testvectors/4096_v0.dat +G2F 2 5 0 0 +1 +1024 +1 +1 +testvectors/1024_v0.dat +RD 3 6 0 0 +1 +4096 +1 +1 +testvectors/4096_v1.dat +F2G 3 7 0 0 +1 +1024 +1 +1 +testvectors/1024_v1.dat diff --git a/global_buffer/testvectors_amber/test04.txt b/global_buffer/testvectors_amber/test04.txt new file mode 100644 index 0000000000..e3ae2fd972 --- /dev/null +++ b/global_buffer/testvectors_amber/test04.txt @@ -0,0 +1,15 @@ +// Test tile6 with two dimension. Same address pattern as resnet. +2 +00 +G2F 6 12 0 0 +1 +4096 +1 +1 +testvectors/4096_v0.dat +F2G 6 13 0 130 +2 +62 62 +1 64 +1 62 +testvectors/3844_v0.dat diff --git a/global_buffer/testvectors_amber/test05.txt b/global_buffer/testvectors_amber/test05.txt new file mode 100644 index 0000000000..2711464187 --- /dev/null +++ b/global_buffer/testvectors_amber/test05.txt @@ -0,0 +1,15 @@ +// Test G2F / F2G. data start addr, cycle start addr are non-zero. +2 +00 +G2F 0 0 2 2 +2 +64 64 +1 65 +1 64 +testvectors/4096_v0.dat +F2G 0 1 2 2 +2 +64 64 +1 65 +1 64 +testvectors/4096_v0.dat diff --git a/global_buffer/testvectors_amber/test06.txt b/global_buffer/testvectors_amber/test06.txt new file mode 100644 index 0000000000..f8998c871a --- /dev/null +++ b/global_buffer/testvectors_amber/test06.txt @@ -0,0 +1,27 @@ +// Test WR and F2G to the same tile, different banks. Test RD and G2F to the same tile, different banks. +4 +00 +WR 0 0 0 0 +1 +4096 +1 +1 +testvectors/4096_v0.dat +F2G 0 1 2 2 +2 +32 32 +1 33 +1 32 +testvectors/1024_v0.dat +G2F 1 2 0 0 +1 +1024 +1 +1 +testvectors/1024_v1.dat +RD 1 3 0 0 +1 +4096 +1 +1 +testvectors/4096_v1.dat diff --git a/global_buffer/testvectors_amber/test07.txt b/global_buffer/testvectors_amber/test07.txt new file mode 100644 index 0000000000..9d737c2144 --- /dev/null +++ b/global_buffer/testvectors_amber/test07.txt @@ -0,0 +1,15 @@ +// Test chaining of tile0 and tile1. rdrq accesses both bank1 and bank2 +2 +1111111 +G2F 0 1 131000 0 +1 +4096 +1 +1 +testvectors/4096_v0.dat +F2G 1 3 0 130 +2 +62 62 +1 64 +1 62 +testvectors/3844_v0.dat diff --git a/global_buffer/testvectors_amber/test08.txt b/global_buffer/testvectors_amber/test08.txt new file mode 100644 index 0000000000..c6b7c68a1c --- /dev/null +++ b/global_buffer/testvectors_amber/test08.txt @@ -0,0 +1,15 @@ +// Test parallel configuration. Flowing Right. Tile/Bank/StartAddr/CheckTile +2 +0 +PCFG 0 1 0 5 +1 +4096 +1 +1 +testvectors/4096_v0.dat +PCFG 6 12 0 15 +1 +4096 +1 +1 +testvectors/4096_v1.dat diff --git a/global_buffer/testvectors_amber/test09.txt b/global_buffer/testvectors_amber/test09.txt new file mode 100644 index 0000000000..56e39741f9 --- /dev/null +++ b/global_buffer/testvectors_amber/test09.txt @@ -0,0 +1,9 @@ +// Test Sram configuration +1 +00 +SRAM 0 29 0 0 +1 +64 +1 +1 +testvectors/64_v0.dat diff --git a/global_buffer/testvectors_amber/test10.txt b/global_buffer/testvectors_amber/test10.txt new file mode 100644 index 0000000000..74e8133d21 --- /dev/null +++ b/global_buffer/testvectors_amber/test10.txt @@ -0,0 +1,9 @@ +// Test tile 6. sparse writing +1 +00 +F2G 6 12 0 0 +2 +62 62 +10 640 +1 62 +testvectors/3844_v0.dat diff --git a/global_buffer/testvectors_amber/test11.txt b/global_buffer/testvectors_amber/test11.txt new file mode 100644 index 0000000000..94cda2a56a --- /dev/null +++ b/global_buffer/testvectors_amber/test11.txt @@ -0,0 +1,15 @@ +// Test parallel configuration. Flowing left. Tile/Bank/StartAddr/CheckTile +2 +100000 +PCFG 5 13 0 0 +1 +4096 +1 +1 +testvectors/4096_v0.dat +PCFG 15 31 0 6 +1 +4096 +1 +1 +testvectors/4096_v1.dat diff --git a/global_buffer/testvectors_amber/test12.txt b/global_buffer/testvectors_amber/test12.txt new file mode 100644 index 0000000000..1df1af47a2 --- /dev/null +++ b/global_buffer/testvectors_amber/test12.txt @@ -0,0 +1,18 @@ +// Ready valid +2 +00 +G2F 0 0 2 2 +RV +2 +64 64 +1 65 +1 64 +testvectors/4096_v0.dat +F2G 0 1 2 2 +RV +5 +1 +3849 +1 +1 +testvectors/3844_v0_b5.dat diff --git a/global_buffer/testvectors_amber/test13.txt b/global_buffer/testvectors_amber/test13.txt new file mode 100644 index 0000000000..04c0a16115 --- /dev/null +++ b/global_buffer/testvectors_amber/test13.txt @@ -0,0 +1,18 @@ +// Ready valid with compressed form (two blocks) +2 +00 +G2F 0 0 2 2 +RV +2 +64 64 +1 65 +1 64 +testvectors/4096_v0.dat +F2G 0 1 2 2 +RVC +2000 +2 +62 62 +1 64 +1 62 +testvectors/3844_v0.dat From df91ffc33d45d86a3fc0aa69e4cff906efc1b21f Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 15 Dec 2022 10:25:34 -0800 Subject: [PATCH 49/63] gf/tsmc merge of GLB sim files --- global_buffer/Makefile | 12 +- global_buffer/sim_amber/cgra.sv | 215 ++++ global_buffer/sim_amber/dump_fsdb.tcl | 30 + global_buffer/sim_amber/dump_shm.tcl | 17 + global_buffer/sim_amber/glb_test.sv | 1130 ++++++++++++++++++++ global_buffer/sim_amber/kernel.sv | 198 ++++ global_buffer/sim_amber/tb_global_buffer.f | 4 + global_buffer/sim_amber/top.sv | 262 +++++ 8 files changed, 1863 insertions(+), 5 deletions(-) create mode 100644 global_buffer/sim_amber/cgra.sv create mode 100644 global_buffer/sim_amber/dump_fsdb.tcl create mode 100644 global_buffer/sim_amber/dump_shm.tcl create mode 100644 global_buffer/sim_amber/glb_test.sv create mode 100644 global_buffer/sim_amber/kernel.sv create mode 100644 global_buffer/sim_amber/tb_global_buffer.f create mode 100644 global_buffer/sim_amber/top.sv diff --git a/global_buffer/Makefile b/global_buffer/Makefile index a97e621071..4994ac3282 100644 --- a/global_buffer/Makefile +++ b/global_buffer/Makefile @@ -50,17 +50,19 @@ ifeq ($(WHICH_SOC), amber) NETLIST_FILES ?= gls/glb.vcs.v gls/glb_tile.vcs.v gls/glb_tile_sram.v gls/stdcells.v gls/stdcells-pm.v endif -TB_FILES ?= -F sim/tb_global_buffer.f - # test vectors # To succeed, amber builds will need to set WHICH_SOC env var to "amber" # onyx builds don't need to do anything new +TESTVECTORS = testvectors +SIM = sim ifeq ($(WHICH_SOC), amber) TESTVECTORS = testvectors_amber + SIM = sim_amber else - TESTVECTORS = testvectors endif +TB_FILES ?= -F $(SIM)/tb_global_buffer.f + # ------------------------------------------------------------------- # Commands # ------------------------------------------------------------------- @@ -153,9 +155,9 @@ else endif ifeq ($(TOOL), XCELIUM) - DUMP_ARGS = -input sim/dump_shm.tcl + DUMP_ARGS = -input $(SIM)/dump_shm.tcl else ifeq ($(TOOL), VCS) - DUMP_ARGS = -ucli -i sim/dump_fsdb.tcl + DUMP_ARGS = -ucli -i $(SIM)/dump_fsdb.tcl endif ifneq ($(SDF), 0) diff --git a/global_buffer/sim_amber/cgra.sv b/global_buffer/sim_amber/cgra.sv new file mode 100644 index 0000000000..b368f768a4 --- /dev/null +++ b/global_buffer/sim_amber/cgra.sv @@ -0,0 +1,215 @@ +module cgra ( + input logic clk, + input logic reset, + input logic [NUM_PRR-1:0] stall, + input logic [NUM_PRR-1:0] cfg_wr_en, + input logic [NUM_PRR-1:0][CGRA_CFG_ADDR_WIDTH-1:0] cfg_wr_addr, + input logic [NUM_PRR-1:0][CGRA_CFG_DATA_WIDTH-1:0] cfg_wr_data, + input logic [NUM_PRR-1:0] cfg_rd_en, + input logic [NUM_PRR-1:0][CGRA_CFG_ADDR_WIDTH-1:0] cfg_rd_addr, + output logic [NUM_PRR-1:0][CGRA_CFG_DATA_WIDTH-1:0] cfg_rd_data, + input logic [NUM_PRR-1:0] io1_g2io, + input logic [NUM_PRR-1:0][ 15:0] io16_g2io, + output logic [NUM_PRR-1:0] io1_io2g, + output logic [NUM_PRR-1:0][ 15:0] io16_io2g +); + localparam int PRR_CFG_REG_DEPTH = 16; + + // --------------------------------------- + // Configuration + // --------------------------------------- + logic [CGRA_CFG_DATA_WIDTH-1:0] cfg_reg[NUM_PRR][PRR_CFG_REG_DEPTH]; + localparam int CGRA_CFG_PRR_WIDTH = $clog2(PRR_CFG_REG_DEPTH); + + always_ff @(posedge clk or posedge reset) begin + if (reset) begin + for (int i = 0; i < NUM_PRR; i++) begin + for (int j = 0; j < PRR_CFG_REG_DEPTH; j++) begin + cfg_reg[i][j] <= 0; + end + end + end else begin + for (int i = 0; i < NUM_PRR; i++) begin + if (cfg_wr_en[i]) begin + if (cfg_wr_addr[i][CGRA_CFG_ADDR_WIDTH-1-:NUM_PRR_WIDTH] == i) begin + cfg_reg[i][cfg_wr_addr[i][CGRA_CFG_PRR_WIDTH-1:0]] <= cfg_wr_data[i]; + end + end + end + end + end + + function automatic bit [CGRA_CFG_DATA_WIDTH-1:0] cfg_read( + bit [CGRA_CFG_ADDR_WIDTH-1:0] addr); + int prr_id = addr[CGRA_CFG_ADDR_WIDTH-1-:NUM_PRR_WIDTH]; + cfg_read = cfg_reg[prr_id][addr[CGRA_CFG_PRR_WIDTH-1:0]]; + endfunction + + always_comb begin + for (int i = 0; i < NUM_PRR; i++) begin + if (cfg_rd_en[i] && cfg_rd_addr[i][CGRA_CFG_ADDR_WIDTH-1-:NUM_PRR_WIDTH] == i) begin + cfg_rd_data[i] = cfg_reg[i][cfg_rd_addr[i][CGRA_CFG_PRR_WIDTH-1:0]]; + end else begin + cfg_rd_data[i] = '0; + end + end + end + + // --------------------------------------- + // Control + // --------------------------------------- + bit [NUM_PRR-1:0] is_glb2prr_on; + bit [NUM_PRR-1:0] is_prr2glb_on; + bit [NUM_PRR-1:0] is_prr2glb_done; + + bit [NUM_PRR-1:0][99:0] prr2glb_cnt; + bit [NUM_PRR-1:0] prr2glb_valid; + + bit [15:0] glb2prr_q[int][$]; + bit [15:0] prr2glb_q[int][$]; + bit [99:0] prr2glb_valid_cnt_q[int][$]; + + bit flush; + + // --------------------------------------- + // Data Queue + // --------------------------------------- + + always_ff @(posedge clk or posedge reset) begin + if (reset) begin + for (int i = 0; i < NUM_PRR; i++) begin + glb2prr_q[i] = {}; + end + end else begin + for (int i = 0; i < NUM_PRR; i++) begin + if (flush) begin + glb2prr_q[i] = {}; + end else if (!stall[i]) begin + if (is_glb2prr_on[i] == 1) begin + if (io1_g2io[i] == 1) begin + glb2prr_q[i].push_back(io16_g2io[i]); + end + end + end + end + end + end + + always @(posedge clk or posedge reset) begin + if (reset) begin + for (int i = 0; i < NUM_PRR; i++) begin + prr2glb_q[i] = {}; + io1_io2g[i] <= 0; + io16_io2g[i] <= 0; + end + end else begin + for (int i = 0; i < NUM_PRR; i++) begin + if (flush) begin + prr2glb_q[i] = {}; + end else if (!stall[i]) begin + if (prr2glb_valid[i] == 1 && (prr2glb_q[i].size() > 0)) begin + io1_io2g[i] <= 1; + io16_io2g[i] <= prr2glb_q[i].pop_front(); + end else begin + io1_io2g[i] <= 0; + io16_io2g[i] <= 0; + end + end + end + end + end + + always_ff @(posedge clk or posedge reset) begin + if (reset) begin + prr2glb_valid <= 0; + prr2glb_cnt <= 0; + is_prr2glb_done <= '0; + end else begin + for (int i = 0; i < NUM_PRR; i++) begin + if (flush) begin + prr2glb_cnt <= 0; + is_prr2glb_done <= '0; + prr2glb_valid <= 0; + end else if (!stall[i]) begin + if ((is_prr2glb_on[i] == 1) && (is_prr2glb_done[i] == 0)) begin + if (prr2glb_cnt[i] == prr2glb_valid_cnt_q[i][0]) begin + prr2glb_valid[i] <= 1; + void'(iterate_valid_cnt(i)); + end else begin + prr2glb_valid[i] <= 0; + end + prr2glb_cnt[i] <= prr2glb_cnt[i] + 1; + end else begin + prr2glb_valid[i] <= 0; + end + end + end + end + end + + initial begin + foreach (prr2glb_valid_cnt_q[i]) begin + prr2glb_valid_cnt_q[i] = {}; + end + end + + function prr2glb_configure(int prr_id, int dim, int extent[LOOP_LEVEL], + int cycle_stride[LOOP_LEVEL]); + bit [99:0] cnt; + bit done; + int i_extent[LOOP_LEVEL]; + prr2glb_valid_cnt_q[prr_id] = {}; + done = 0; + i_extent = '{LOOP_LEVEL{0}}; + while (1) begin + cnt = 0; + for (int i = 0; i < dim; i++) begin + cnt += i_extent[i] * cycle_stride[i]; + end + prr2glb_valid_cnt_q[prr_id].push_back(cnt); + for (int i = 0; i < dim; i++) begin + i_extent[i] += 1; + if (i_extent[i] == extent[i]) begin + i_extent[i] = 0; + if (i == dim - 1) done = 1; + end else begin + break; + end + end + if (done == 1) break; + end + endfunction + + function glb2prr_on(int prr_id); + is_glb2prr_on[prr_id] = '1; + endfunction + + function glb2prr_off(int prr_id); + is_glb2prr_on[prr_id] = '0; + endfunction + + function prr2glb_on(int prr_id); + is_prr2glb_on[prr_id] = '1; + endfunction + + function prr2glb_off(int prr_id); + is_prr2glb_on[prr_id] = '0; + endfunction + + function iterate_valid_cnt(int prr_id); + void'(prr2glb_valid_cnt_q[prr_id].pop_front()); + if (prr2glb_valid_cnt_q[prr_id].size() == 0) begin + is_prr2glb_done[prr_id] = 1; + // void'(prr2glb_off(prr_id)); + end + endfunction + + function flush_on(); + flush = 1; + endfunction + + function flush_off(); + flush = 0; + endfunction + +endmodule diff --git a/global_buffer/sim_amber/dump_fsdb.tcl b/global_buffer/sim_amber/dump_fsdb.tcl new file mode 100644 index 0000000000..02db7cee2c --- /dev/null +++ b/global_buffer/sim_amber/dump_fsdb.tcl @@ -0,0 +1,30 @@ +if { $::env(WAVEFORM) == "0" && $::env(SAIF) == "0" } { + run + exit +} else { + stop -change top.test.test_toggle + run + + if { $::env(WAVEFORM) != "0" } { + dump -file global_buffer.fsdb -type FSDB + dump -add top -fsdb_opt +mda+packedmda+struct + } + if { $::env(SAIF) != "0" } { + power -gate_level on mda sv + power top.dut + power -enable + } + + run + + if { $::env(SAIF) != "0" } { + power -disable + power -report run.saif 1e-15 top.dut + } + if { $::env(WAVEFORM) != "0" } { + dump -close + } + + run + exit +} diff --git a/global_buffer/sim_amber/dump_shm.tcl b/global_buffer/sim_amber/dump_shm.tcl new file mode 100644 index 0000000000..5c2f8982ec --- /dev/null +++ b/global_buffer/sim_amber/dump_shm.tcl @@ -0,0 +1,17 @@ +if { $::env(WAVEFORM) != "0" } { + database -open global_buffer -shm + probe -create top -depth all -all -memories -functions -tasks -shm -database global_buffer +} + +if { $::env(SAIF) == "0" } { + run + exit +} else { + stop -name test_toggle -object top.test.test_toggle + run + dumpsaif -ewg -scope top.dut -hierarchy -internal -output run.saif -overwrite + run + dumpsaif -end + run + exit +} diff --git a/global_buffer/sim_amber/glb_test.sv b/global_buffer/sim_amber/glb_test.sv new file mode 100644 index 0000000000..d9dfc4a667 --- /dev/null +++ b/global_buffer/sim_amber/glb_test.sv @@ -0,0 +1,1130 @@ +/*============================================================================= +** Module: glb_test.sv +** Description: +** simple top testbench for glb +** Author: Taeyoung Kong +** Change history: 05/22/2021 - Implement first version of testbench +**===========================================================================*/ +program glb_test ( + // LEFT + input logic clk, + input logic reset, + output logic [NUM_GLB_TILES-1:0] glb_clk_en_master, + output logic [NUM_GLB_TILES-1:0] glb_clk_en_bank_master, + output logic [NUM_GLB_TILES-1:0] pcfg_broadcast_stall, + output logic [NUM_GROUPS-1:0][$clog2(NUM_GLB_TILES)-1:0] flush_crossbar_sel, + + // proc + output logic proc_wr_en, + output logic [BANK_DATA_WIDTH/8-1:0] proc_wr_strb, + output logic [ GLB_ADDR_WIDTH-1:0] proc_wr_addr, + output logic [ BANK_DATA_WIDTH-1:0] proc_wr_data, + output logic proc_rd_en, + output logic [ GLB_ADDR_WIDTH-1:0] proc_rd_addr, + input logic [ BANK_DATA_WIDTH-1:0] proc_rd_data, + input logic proc_rd_data_valid, + + // configuration of glb from glc + output logic if_cfg_wr_en, + output logic if_cfg_wr_clk_en, + output logic [AXI_ADDR_WIDTH-1:0] if_cfg_wr_addr, + output logic [AXI_DATA_WIDTH-1:0] if_cfg_wr_data, + output logic if_cfg_rd_en, + output logic if_cfg_rd_clk_en, + output logic [AXI_ADDR_WIDTH-1:0] if_cfg_rd_addr, + input logic [AXI_DATA_WIDTH-1:0] if_cfg_rd_data, + input logic if_cfg_rd_data_valid, + + // configuration of sram from glc + output logic if_sram_cfg_wr_en, + output logic [GLB_ADDR_WIDTH-1:0] if_sram_cfg_wr_addr, + output logic [AXI_DATA_WIDTH-1:0] if_sram_cfg_wr_data, + output logic if_sram_cfg_rd_en, + output logic [GLB_ADDR_WIDTH-1:0] if_sram_cfg_rd_addr, + input logic [AXI_DATA_WIDTH-1:0] if_sram_cfg_rd_data, + input logic if_sram_cfg_rd_data_valid, + + // cgra configuration from global controller + output logic cgra_cfg_jtag_gc2glb_wr_en, + output logic cgra_cfg_jtag_gc2glb_rd_en, + output logic [CGRA_CFG_ADDR_WIDTH-1:0] cgra_cfg_jtag_gc2glb_addr, + output logic [CGRA_CFG_DATA_WIDTH-1:0] cgra_cfg_jtag_gc2glb_data, + + // control pulse + output logic [NUM_GLB_TILES-1:0] strm_g2f_start_pulse, + output logic [NUM_GLB_TILES-1:0] strm_f2g_start_pulse, + output logic [NUM_GLB_TILES-1:0] pcfg_start_pulse, + input logic [NUM_GLB_TILES-1:0] strm_g2f_interrupt_pulse, + input logic [NUM_GLB_TILES-1:0] strm_f2g_interrupt_pulse, + input logic [NUM_GLB_TILES-1:0] pcfg_g2f_interrupt_pulse, + + // cgra configuration to cgra + input logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] cgra_cfg_g2f_cfg_wr_en, + input logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] cgra_cfg_g2f_cfg_rd_en, + input logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_CFG_ADDR_WIDTH-1:0] cgra_cfg_g2f_cfg_addr, + input logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_CFG_DATA_WIDTH-1:0] cgra_cfg_g2f_cfg_data +); + timeunit 100ps; timeprecision 1ps; + + const int MAX_NUM_ERRORS = 20; + int test_toggle = 0; + int tile_id = 0; + + bit [NUM_GLB_TILES-1:0] tile_id_mask = 0; + + task automatic init_test(); + // Initialize global buffer configuration registers + $root.top.assert_reset(); + + for (int i = 0; i < NUM_GLB_TILES; i++) begin + void'($root.top.cgra.glb2prr_off(i)); + void'($root.top.cgra.prr2glb_off(i)); + end + void'($root.top.cgra.flush_on()); + @(posedge clk); + void'($root.top.cgra.flush_off()); + @(posedge clk); + endtask + + task automatic run_test(Test test); + int err = 0; + int i_addr = 0; + int i_extent[LOOP_LEVEL]; + int latency; + int data_cnt = 0; + bit done = 0; + int num_chained_prev = 0; + int num_chained_next = 0; + + Kernel kernels[] = test.kernels; + + if (test.data_network_mask != 0) begin + foreach (kernels[i]) begin + num_chained_prev = 0; + num_chained_next = 0; + for (int j = kernels[i].tile_id; j <= NUM_GLB_TILES; j++) begin + if (test.data_network_mask[j] == 1) begin + num_chained_next++; + end else begin + break; + end + end + for (int j = kernels[i].tile_id - 1; j >= 0; j--) begin + if (test.data_network_mask[j] == 1) begin + num_chained_prev++; + end else begin + break; + end + end + for ( + int j = kernels[i].tile_id - num_chained_prev; + j < kernels[i].tile_id + num_chained_next; + j++ + ) begin + if (kernels[i].type_ == PCFG) begin + pcfg_network_connect(j, 1); + end else begin + data_network_connect(j, 1); + end + end + if (kernels[i].type_ == PCFG) begin + pcfg_network_latency(kernels[i].tile_id, + (num_chained_prev + num_chained_next) * 2 + 3); + end else begin + data_network_latency(kernels[i].tile_id, + (num_chained_prev + num_chained_next) * 2 + 3); + end + end + end + glb_pcfg_broadcast_stall(test.pcfg_broadcast_stall_mask); + foreach (kernels[i]) begin + if (kernels[i].type_ == PCFG) begin + glb_pcfg_broadcast_mux(test.pcfg_broadcast_mux_value); + break; + end + end + + foreach (kernels[i]) begin + if (kernels[i].type_ == WR) begin + // WR/RD only has one loop level + kernels[i].data_arr = new[kernels[i].extent[0]]; + kernels[i].data_arr_out = new[kernels[i].extent[0]]; + $readmemh(kernels[i].filename, kernels[i].data_arr); + // Since data is in 16bit word, we have to convert it to 64bit data array + kernels[i].data64_arr = convert_16b_to_64b(kernels[i].data_arr); + kernels[i].data64_arr_out = new[kernels[i].data64_arr.size()]; + end else if (kernels[i].type_ == RD) begin + kernels[i].data_arr = new[kernels[i].extent[0]]; + kernels[i].data_arr_out = new[kernels[i].extent[0]]; + $readmemh(kernels[i].filename, kernels[i].data_arr); + // Since data is in 16bit word, we have to convert it to 64bit data array + kernels[i].data64_arr = convert_16b_to_64b(kernels[i].data_arr); + kernels[i].data64_arr_out = new[kernels[i].data64_arr.size()]; + // Note: In order to test RD, we have to load data to SRAM first. + // Since our hardware generator does not support ifdef or inline verilog, we have to run task + // to write data to memory + proc_write_burst(kernels[i].start_addr, kernels[i].data64_arr); + end else if (kernels[i].type_ == PCFG) begin + kernels[i].data_arr = new[kernels[i].extent[0]]; + kernels[i].data_arr_out = new[kernels[i].extent[0]]; + $readmemh(kernels[i].filename, kernels[i].data_arr); + // Since data is in 16bit word, we have to convert it to 64bit data array + kernels[i].data64_arr = convert_16b_to_64b(kernels[i].data_arr); + kernels[i].data64_arr_out = new[kernels[i].data64_arr.size()]; + // Note: In order to test PCFG, we have to load data to SRAM first. + // Since our hardware generator does not support ifdef or inline verilog, we have to run task + // to write data to memory + proc_write_burst(kernels[i].start_addr, kernels[i].data64_arr); + pcfg_dma_configure(kernels[i].tile_id, 1, kernels[i].start_addr, + kernels[i].extent[0] / 4); + end else if (kernels[i].type_ == G2F) begin + data_cnt = 1; + for (int j = 0; j < kernels[i].dim; j++) begin + data_cnt += (kernels[i].extent[j] - 1) * kernels[i].data_stride[j]; + end + kernels[i].mem = new[data_cnt]; + $readmemh(kernels[i].filename, kernels[i].mem); + + data_cnt = 1; + for (int j = 0; j < kernels[i].dim; j++) begin + data_cnt *= kernels[i].extent[j]; + end + kernels[i].data_arr = new[data_cnt]; + kernels[i].data_arr_out = new[data_cnt]; + i_addr = kernels[i].start_addr; + i_extent = '{LOOP_LEVEL{0}}; + done = 0; + data_cnt = 0; + // Note: Again, we cannot call function to write data to memory, we have to run task + // to write data to memory. Use partial wrtie function to do that. + #2 + while (1) begin + i_addr = kernels[i].start_addr; + for (int j = 0; j < kernels[i].dim; j++) begin + i_addr += kernels[i].data_stride[j] * i_extent[j] * 2; // Convert 16bit-word address to byte address + end + // Update internal counter + for (int j = 0; j < kernels[i].dim; j++) begin + i_extent[j] += 1; + if (i_extent[j] == kernels[i].extent[j]) begin + i_extent[j] = 0; + if (j == kernels[i].dim - 1) done = 1; + end else begin + break; + end + end + proc_write_partial(i_addr, + kernels[i].mem[(i_addr-kernels[i].start_addr)/2]); + kernels[i].data_arr[ + data_cnt++ + ] = kernels[i].mem[(i_addr-kernels[i].start_addr)/2]; + if (done == 1) break; + end + // Configure LD DMA + g2f_dma_configure(kernels[i].tile_id, 1, kernels[i].start_addr, + kernels[i].cycle_start_addr, kernels[i].dim, + kernels[i].new_extent, kernels[i].new_cycle_stride, + kernels[i].new_data_stride); + end else if (kernels[i].type_ == F2G) begin + data_cnt = 1; + for (int j = 0; j < kernels[i].dim; j++) begin + data_cnt *= kernels[i].extent[j]; + end + kernels[i].data_arr = new[data_cnt]; + kernels[i].data_arr_out = new[data_cnt]; + $readmemh(kernels[i].filename, kernels[i].data_arr); + kernels[i].data64_arr = convert_16b_to_64b(kernels[i].data_arr); + kernels[i].data64_arr_out = new[kernels[i].data64_arr.size()]; + + // Store the data to PRR queue. + write_prr(kernels[i].tile_id, kernels[i].data_arr); + // Configure PRR controller to follow cycle stride/extent pattern. + void'($root.top.cgra.prr2glb_configure( + kernels[i].tile_id, kernels[i].dim, kernels[i].extent, kernels[i].cycle_stride + )); + // Configure ST DMA + f2g_dma_configure(kernels[i].tile_id, 1, kernels[i].start_addr, + kernels[i].cycle_start_addr, kernels[i].dim, + kernels[i].new_extent, kernels[i].new_cycle_stride, + kernels[i].new_data_stride); + end else if (kernels[i].type_ == SRAM) begin + kernels[i].data_arr = new[kernels[i].extent[0]]; + kernels[i].data_arr_out = new[kernels[i].extent[0]]; + $readmemh(kernels[i].filename, kernels[i].data_arr); + // Since data is in 16bit word, we have to convert it to 64bit data array + kernels[i].data64_arr = convert_16b_to_64b(kernels[i].data_arr); + kernels[i].data64_arr_out = new[kernels[i].data64_arr.size()]; + end + end + + repeat (50) @(posedge clk); + + $display("\n---- Test Run ----"); + // start + test_toggle = 1; + fork + if (test.g2f_tile_mask != 0) g2f_start(test.g2f_tile_mask); + if (test.f2g_tile_mask != 0) f2g_start(test.f2g_tile_mask); + if (test.pcfg_tile_mask != 0) pcfg_start(test.pcfg_tile_mask); + begin + foreach (kernels[i]) begin + automatic int j = i; + fork + if (kernels[j].type_ == WR) begin + proc_write_burst(kernels[j].start_addr, convert_16b_to_64b( + kernels[j].data_arr)); + end else if (kernels[j].type_ == RD) begin + proc_read_burst(kernels[j].start_addr, kernels[j].data64_arr_out); + end else if (kernels[j].type_ == G2F) begin + g2f_run(kernels[j].tile_id, kernels[j].total_cycle); + end else if (kernels[j].type_ == F2G) begin + f2g_run(kernels[j].tile_id, kernels[j].total_cycle); + end else if (kernels[j].type_ == PCFG) begin + pcfg_run(kernels[j].tile_id, kernels[j].check_tile_id, kernels[j].total_cycle, + kernels[j].data64_arr_out); + end else if (kernels[j].type_ == SRAM) begin + sram_write_burst(kernels[j].bank_id, kernels[j].data64_arr); + sram_read_burst(kernels[j].bank_id, kernels[j].data64_arr_out); + end + join_none + end + wait fork; + end + join_none + wait fork; + // end + test_toggle = 0; + + repeat (50) @(posedge clk); + + // compare + $display("\n---- Test Result ----"); + foreach (kernels[i]) begin + if (kernels[i].type_ == WR) begin + proc_read_burst(kernels[i].start_addr, kernels[i].data64_arr_out); + $display("WR Comparison"); + err += compare_64b_arr(kernels[i].data64_arr, kernels[i].data64_arr_out); + end else if (kernels[i].type_ == RD) begin + $display("RD Comparison"); + err += compare_64b_arr(kernels[i].data64_arr, kernels[i].data64_arr_out); + end else if (kernels[i].type_ == G2F) begin + // FIXME: Only works with 'use_valid' set in LD_DMA CTRL + read_prr(kernels[i].tile_id, kernels[i].data_arr_out); + $display("G2F Comparison"); + err += compare_16b_arr(kernels[i].data_arr, kernels[i].data_arr_out); + end else if (kernels[i].type_ == F2G) begin + proc_read_burst(kernels[i].start_addr, kernels[i].data64_arr_out); + $display("F2G Comparison"); + err += compare_64b_arr(kernels[i].data64_arr, kernels[i].data64_arr_out); + end else if (kernels[i].type_ == PCFG) begin + $display("PCFG Comparison"); + err += compare_64b_arr(kernels[i].data64_arr, kernels[i].data64_arr_out); + end else if (kernels[i].type_ == SRAM) begin + $display("SRAM Comparison"); + err += compare_64b_arr(kernels[i].data64_arr, kernels[i].data64_arr_out); + end + repeat (30) @(posedge clk); + end + + if (err == 0) begin + $display("Test passed!"); + end else begin + $error("Test failed!"); + end + + endtask + + initial begin + Test test; + string test_filename; + string test_name; + int max_num_test; + initialize(); + if (!($value$plusargs("MAX_NUM_TEST=%d", max_num_test))) max_num_test = 10; + for (int i = 1; i <= max_num_test; i++) begin + $sformat(test_name, "test%02d", i); + if (($test$plusargs(test_name))) begin + $display("\n************** Test Start *****************"); + $sformat(test_filename, "./testvectors/%s.txt", test_name); + test = new(test_filename); + init_test(); + run_test(test); + $display("************** Test End *****************\n"); + end + end + end + + task initialize(); + // control + glb_clk_en_master <= 0; + glb_clk_en_bank_master <= 0; + pcfg_broadcast_stall <= 0; + pcfg_start_pulse <= 0; + strm_g2f_start_pulse <= 0; + strm_f2g_start_pulse <= 0; + flush_crossbar_sel <= 0; + + // proc + proc_wr_en <= 0; + proc_wr_strb <= 0; + proc_wr_addr <= 0; + proc_wr_data <= 0; + proc_rd_en <= 0; + proc_rd_addr <= 0; + + // cfg ifc + if_cfg_wr_en <= 0; + if_cfg_wr_clk_en <= 0; + if_cfg_wr_addr <= 0; + if_cfg_wr_data <= 0; + if_cfg_rd_en <= 0; + if_cfg_rd_clk_en <= 0; + if_cfg_rd_addr <= 0; + + // sram ifc + if_sram_cfg_wr_en <= 0; + if_sram_cfg_wr_addr <= 0; + if_sram_cfg_wr_data <= 0; + if_sram_cfg_rd_en <= 0; + if_sram_cfg_rd_addr <= 0; + + // jtag + cgra_cfg_jtag_gc2glb_wr_en <= 0; + cgra_cfg_jtag_gc2glb_rd_en <= 0; + cgra_cfg_jtag_gc2glb_addr <= 0; + cgra_cfg_jtag_gc2glb_data <= 0; + + // wait for reset clear + wait (reset == 0); + repeat (10) @(posedge clk); + endtask + + task glb_flush_crossbar_ctrl(logic [NUM_GROUPS-1:0][$clog2(NUM_GLB_TILES)-1:0] mask); + #2 $display("Glb flush signal crossbar configuration %8h", mask); + flush_crossbar_sel <= mask; + repeat (4) @(posedge clk); + + endtask + + task glb_clk_en_master_ctrl(logic [NUM_GLB_TILES-1:0] mask); + #2 $display("Glb tiles master clk enable with mask %16b", mask); + glb_clk_en_master <= mask; + repeat (4) @(posedge clk); + + endtask + + task glb_clk_en_bank_master_ctrl(logic [NUM_GLB_TILES-1:0] mask); + #2 $display("Glb tiles master bank clk enable with mask %16b", mask); + glb_clk_en_bank_master <= mask; + repeat (4) @(posedge clk); + + endtask + + task glb_pcfg_broadcast_stall(logic [NUM_GLB_TILES-1:0] mask); + #2 $display("Glb tiles PCFG broadcast logics are stalled with mask %16b", mask); + pcfg_broadcast_stall <= mask; + repeat (4) @(posedge clk); + + endtask + + task glb_pcfg_broadcast_mux( + logic [`GLB_PCFG_BROADCAST_MUX_R_MSB:0] mux_value[NUM_GLB_TILES-1:0]); + $display("Set pcfg broadcast muxes"); + for (int i = 0; i < NUM_GLB_TILES; i++) begin + $display("Tile ID: %0d, South Mux: %2b, West Mux: %2b, East Mux: %2b", i, + mux_value[i][`GLB_PCFG_BROADCAST_MUX_SOUTH_F_MSB : `GLB_PCFG_BROADCAST_MUX_SOUTH_F_LSB], + mux_value[i][`GLB_PCFG_BROADCAST_MUX_WEST_F_MSB : `GLB_PCFG_BROADCAST_MUX_WEST_F_LSB], + mux_value[i][`GLB_PCFG_BROADCAST_MUX_EAST_F_MSB : `GLB_PCFG_BROADCAST_MUX_EAST_F_LSB]); + glb_cfg_write( + (i << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_BROADCAST_MUX_R, + mux_value[i]); + glb_cfg_read( + (i << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_BROADCAST_MUX_R, + mux_value[i]); + end + repeat (4) @(posedge clk); + + endtask + + task automatic sram_write_burst(int bank_id, ref data64 data); + int start_addr = (1 << BANK_ADDR_WIDTH) * bank_id; + int write_data; + for (int i = 0; i < data.size(); i++) begin + for (int j = 0; j < 2; j++) begin + write_data = data[i][32*j+:32]; + sram_write(start_addr + (i << BANK_BYTE_OFFSET) + (j << (BANK_BYTE_OFFSET - 1)), + write_data); + end + end + endtask + + task automatic sram_write(input bit [GLB_ADDR_WIDTH-1:0] addr, int data); + #2 if_sram_cfg_wr_en <= 1; + if_sram_cfg_wr_addr <= addr; + if_sram_cfg_wr_data <= data; + repeat (4) @(posedge clk); + #2 if_sram_cfg_wr_en <= 0; + if_sram_cfg_wr_addr <= 0; + if_sram_cfg_wr_data <= 0; + repeat (2) @(posedge clk); + endtask + + task automatic sram_read_burst(int bank_id, ref data64 data); + int start_addr = (1 << BANK_ADDR_WIDTH) * bank_id; + int read_data; + for (int i = 0; i < data.size(); i++) begin + for (int j = 0; j < 2; j++) begin + sram_read(start_addr + (i << BANK_BYTE_OFFSET) + (j << (BANK_BYTE_OFFSET - 1)), + read_data); + data[i][32*j+:32] = read_data; + end + end + endtask + + task automatic sram_read(input bit [GLB_ADDR_WIDTH-1:0] addr, ref int data); + int read_delay = 40; + fork + begin + @(posedge clk); + #2 if_sram_cfg_rd_en <= 1; + if_sram_cfg_rd_addr <= addr; + repeat (4) @(posedge clk); + #2 if_sram_cfg_rd_en <= 0; + if_sram_cfg_rd_addr <= 0; + end + begin + for (int i = 0; i < read_delay; i++) begin + if (if_sram_cfg_rd_data_valid == 1) begin + data = if_sram_cfg_rd_data; + break; + end + @(posedge clk); + end + end + join + repeat (20) @(posedge clk); + endtask + + task automatic data_network_connect(input int tile_id, bit is_connected); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_DATA_NETWORK_CTRL_R, + (is_connected << `GLB_DATA_NETWORK_CTRL_CONNECTED_F_LSB)); + endtask + + task automatic data_network_latency(input int tile_id, [LATENCY_WIDTH-1:0] latency); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_DATA_NETWORK_LATENCY_R, + (latency << `GLB_DATA_NETWORK_LATENCY_VALUE_F_LSB)); + endtask + + task automatic pcfg_network_connect(input int tile_id, bit is_connected); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_NETWORK_CTRL_R, + (is_connected << `GLB_PCFG_NETWORK_CTRL_CONNECTED_F_LSB)); + endtask + + task automatic pcfg_network_latency(input int tile_id, [LATENCY_WIDTH-1:0] latency); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_NETWORK_LATENCY_R, + (latency << `GLB_PCFG_NETWORK_LATENCY_VALUE_F_LSB)); + endtask + + task automatic pcfg_dma_configure(input int tile_id, bit on, [AXI_DATA_WIDTH-1:0] start_addr, + [AXI_DATA_WIDTH-1:0] num_word); + glb_cfg_write((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_CTRL_R, + (on << `GLB_PCFG_DMA_CTRL_MODE_F_LSB)); + glb_cfg_read((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_CTRL_R, + (on << `GLB_PCFG_DMA_CTRL_MODE_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_HEADER_START_ADDR_R, + (start_addr << `GLB_PCFG_DMA_HEADER_START_ADDR_START_ADDR_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_HEADER_START_ADDR_R, + (start_addr << `GLB_PCFG_DMA_HEADER_START_ADDR_START_ADDR_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_HEADER_NUM_CFG_R, + (num_word << `GLB_PCFG_DMA_HEADER_NUM_CFG_NUM_CFG_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_PCFG_DMA_HEADER_NUM_CFG_R, + (num_word << `GLB_PCFG_DMA_HEADER_NUM_CFG_NUM_CFG_F_LSB)); + endtask + + task automatic g2f_dma_configure(input int tile_id, bit on, [AXI_DATA_WIDTH-1:0] start_addr, + [AXI_DATA_WIDTH-1:0] cycle_start_addr, int dim, + int extent[LOOP_LEVEL], int cycle_stride[LOOP_LEVEL], + int data_stride[LOOP_LEVEL]); + glb_cfg_write((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_CTRL_R, + ((2'b01 << `GLB_LD_DMA_CTRL_DATA_MUX_F_LSB) + | (on << `GLB_LD_DMA_CTRL_MODE_F_LSB) + | (1 << `GLB_LD_DMA_CTRL_USE_VALID_F_LSB))); + glb_cfg_read((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_CTRL_R, + ((2'b01 << `GLB_LD_DMA_CTRL_DATA_MUX_F_LSB) + | (on << `GLB_LD_DMA_CTRL_MODE_F_LSB) + | (1 << `GLB_LD_DMA_CTRL_USE_VALID_F_LSB))); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_START_ADDR_R, + (start_addr << `GLB_LD_DMA_HEADER_0_START_ADDR_START_ADDR_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_START_ADDR_R, + (start_addr << `GLB_LD_DMA_HEADER_0_START_ADDR_START_ADDR_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_CYCLE_START_ADDR_R, + (cycle_start_addr << `GLB_LD_DMA_HEADER_0_CYCLE_START_ADDR_CYCLE_START_ADDR_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_CYCLE_START_ADDR_R, + (cycle_start_addr << `GLB_LD_DMA_HEADER_0_CYCLE_START_ADDR_CYCLE_START_ADDR_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_DIM_R, + (dim << `GLB_LD_DMA_HEADER_0_DIM_DIM_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_DIM_R, + (dim << `GLB_LD_DMA_HEADER_0_DIM_DIM_F_LSB)); + // NOTE: Each stride/range address difference is 'hc + for (int i = 0; i < dim; i++) begin + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_RANGE_0_R + i * 'hc, + (extent[i] << `GLB_LD_DMA_HEADER_0_RANGE_0_RANGE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_RANGE_0_R + i * 'hc, + (extent[i] << `GLB_LD_DMA_HEADER_0_RANGE_0_RANGE_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_STRIDE_0_R + i * 'hc, + (data_stride[i] << `GLB_LD_DMA_HEADER_0_STRIDE_0_STRIDE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_STRIDE_0_R + i * 'hc, + (data_stride[i] << `GLB_LD_DMA_HEADER_0_STRIDE_0_STRIDE_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_CYCLE_STRIDE_0_R + i * 'hc, + (cycle_stride[i] << `GLB_LD_DMA_HEADER_0_CYCLE_STRIDE_0_CYCLE_STRIDE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_LD_DMA_HEADER_0_CYCLE_STRIDE_0_R + i * 'hc, + (cycle_stride[i] << `GLB_LD_DMA_HEADER_0_CYCLE_STRIDE_0_CYCLE_STRIDE_F_LSB)); + end + endtask + + task automatic f2g_dma_configure(input int tile_id, bit on, [AXI_DATA_WIDTH-1:0] start_addr, + [AXI_DATA_WIDTH-1:0] cycle_start_addr, int dim, + int extent[LOOP_LEVEL], int cycle_stride[LOOP_LEVEL], + int data_stride[LOOP_LEVEL]); + glb_cfg_write((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_CTRL_R, + ((2'b10 << `GLB_ST_DMA_CTRL_DATA_MUX_F_LSB) + | (on << `GLB_ST_DMA_CTRL_MODE_F_LSB) + | (1 << `GLB_ST_DMA_CTRL_USE_VALID_F_LSB))); + glb_cfg_read((tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_CTRL_R, + ((2'b10 << `GLB_ST_DMA_CTRL_DATA_MUX_F_LSB) + | (on << `GLB_ST_DMA_CTRL_MODE_F_LSB) + | (1 << `GLB_ST_DMA_CTRL_USE_VALID_F_LSB))); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_START_ADDR_R, + (start_addr << `GLB_ST_DMA_HEADER_0_START_ADDR_START_ADDR_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_START_ADDR_R, + (start_addr << `GLB_ST_DMA_HEADER_0_START_ADDR_START_ADDR_F_LSB)); + + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_CYCLE_START_ADDR_R, + (cycle_start_addr << `GLB_ST_DMA_HEADER_0_CYCLE_START_ADDR_CYCLE_START_ADDR_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_CYCLE_START_ADDR_R, + (cycle_start_addr << `GLB_ST_DMA_HEADER_0_CYCLE_START_ADDR_CYCLE_START_ADDR_F_LSB)); + + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_DIM_R, + (dim << `GLB_ST_DMA_HEADER_0_DIM_DIM_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_DIM_R, + (dim << `GLB_ST_DMA_HEADER_0_DIM_DIM_F_LSB)); + // NOTE: Each stride/range address difference is 'hc + for (int i = 0; i < dim; i++) begin + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_RANGE_0_R + i * 'hc, + (extent[i] << `GLB_ST_DMA_HEADER_0_RANGE_0_RANGE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_RANGE_0_R + i * 'hc, + (extent[i] << `GLB_ST_DMA_HEADER_0_RANGE_0_RANGE_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_STRIDE_0_R + i * 'hc, + (data_stride[i] << `GLB_ST_DMA_HEADER_0_STRIDE_0_STRIDE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_STRIDE_0_R + i * 'hc, + (data_stride[i] << `GLB_ST_DMA_HEADER_0_STRIDE_0_STRIDE_F_LSB)); + glb_cfg_write( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_CYCLE_STRIDE_0_R + i * 'hc, + (cycle_stride[i] << `GLB_ST_DMA_HEADER_0_CYCLE_STRIDE_0_CYCLE_STRIDE_F_LSB)); + glb_cfg_read( + (tile_id << (AXI_ADDR_WIDTH - TILE_SEL_ADDR_WIDTH)) + `GLB_ST_DMA_HEADER_0_CYCLE_STRIDE_0_R + i * 'hc, + (cycle_stride[i] << `GLB_ST_DMA_HEADER_0_CYCLE_STRIDE_0_CYCLE_STRIDE_F_LSB)); + end + endtask + + task glb_cfg_write(input [AXI_ADDR_WIDTH-1:0] addr, input [AXI_DATA_WIDTH-1:0] data); + repeat (5) @(posedge clk); + #2 if_cfg_wr_clk_en <= 1; + @(posedge clk); + #2 if_cfg_wr_en <= 1; + if_cfg_wr_addr <= addr; + if_cfg_wr_data <= data; + @(posedge clk); + #2 if_cfg_wr_en <= 0; + if_cfg_wr_addr <= 0; + if_cfg_wr_data <= 0; + repeat (NUM_GLB_TILES + 10) @(posedge clk); + #2 if_cfg_wr_clk_en <= 0; + repeat (2) @(posedge clk); + endtask + + task glb_cfg_read(input [AXI_ADDR_WIDTH-1:0] addr, input [AXI_DATA_WIDTH-1:0] data); + repeat (5) @(posedge clk); + #2 if_cfg_rd_clk_en <= 1; + @(posedge clk); + #2 if_cfg_rd_en <= 1; + if_cfg_rd_addr <= addr; + @(posedge clk); + #2 if_cfg_rd_en <= 0; + if_cfg_rd_addr <= 0; + fork : glb_cfg_read_timeout + begin + while (1) begin + if (if_cfg_rd_data_valid) begin + assert (data == if_cfg_rd_data); + break; + end + @(posedge clk); + end + end + begin + repeat (20 + 2 * NUM_GLB_TILES) @(posedge clk); + $display("@%0t: %m ERROR: glb cfg read timeout ", $time); + end + join_any + disable fork; + @(posedge clk); + #2 if_cfg_rd_clk_en <= 0; + repeat (2) @(posedge clk); + endtask + + task automatic proc_write_burst(input [GLB_ADDR_WIDTH-1:0] addr, data64 data); + int size = data.size(); + repeat (5) @(posedge clk); + $display("Start glb-mem burst write. addr: 0x%0h, size %0d", addr, size); + #2 + foreach (data[i]) begin + proc_write(addr + 8 * i, data[i]); + end + #2 proc_wr_en <= 0; + proc_wr_strb <= 0; + $display("Finish glb-mem burst write"); + repeat (5) @(posedge clk); + endtask + + task automatic proc_write(input [GLB_ADDR_WIDTH-1:0] addr, [BANK_DATA_WIDTH-1:0] data); + proc_wr_en <= 1; + proc_wr_strb <= {(BANK_DATA_WIDTH / 8) {1'b1}}; + proc_wr_addr <= addr; + proc_wr_data <= data; + @(posedge clk); + #2 proc_wr_en <= 0; + proc_wr_strb <= 0; + endtask + + task automatic proc_write_partial(input [GLB_ADDR_WIDTH-1:0] addr, + [CGRA_DATA_WIDTH-1:0] data); + bit [BANK_DATA_WIDTH / 8 - 1:0] strb; + bit [BANK_DATA_WIDTH - 1:0] wr_data; + + // FIXME: Lazy to generalize this task. + if (BANK_DATA_WIDTH != CGRA_DATA_WIDTH * 4) + $error("This task assumes that BANK_DATA_WIDTH is 64bit and CGRA_DATA_WIDTH is 16bit."); + + case (addr[2:1]) + 2'b00: begin + strb = {{6{1'b0}}, {2{1'b1}}}; + wr_data = {48'b0, data}; + end + 2'b01: begin + strb = {{4{1'b0}}, {2{1'b1}}, {2{1'b0}}}; + wr_data = {32'b0, data, 16'b0}; + end + 2'b10: begin + strb = {{2{1'b0}}, {2{1'b1}}, {4{1'b0}}}; + wr_data = {16'b0, data, 32'b0}; + end + 2'b11: begin + strb = {{2{1'b1}}, {6{1'b0}}}; + wr_data = {data, 48'b0}; + end + endcase + proc_wr_en <= 1; + proc_wr_strb <= strb; + proc_wr_addr <= addr; + proc_wr_data <= wr_data; + @(posedge clk); + #2 proc_wr_en <= 0; + proc_wr_strb <= 0; + endtask + + task automatic proc_read_burst(input [GLB_ADDR_WIDTH-1:0] addr, ref data64 data); + logic [CGRA_DATA_WIDTH-1:0] data_q[$]; + data16 data_out; + int size = data.size(); + + repeat (5) @(posedge clk); + $display("Start glb-mem burst read. addr: 0x%0h, size %0d", addr, size); + // If address is not aligned, we need to read one more address + if (addr[2:1] != 2'b00) begin + size += 1; + end + fork : proc_read + begin + for (int i = 0; i < size; i++) begin + #2 proc_rd_en <= 1; + proc_rd_addr <= addr + 8 * i; + @(posedge clk); + end + #2 proc_rd_en <= 0; + end + begin + fork : proc_read_timeout + begin + int cnt = 0; + while (1) begin + @(posedge clk); + if (proc_rd_data_valid) begin + // For the first and the last data, we only push valid data to queue + if (cnt == 0) begin + if (addr[2:1] == 2'b00) begin + for (int i = 0; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else if (addr[2:1] == 2'b01) begin + for (int i = 1; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else if (addr[2:1] == 2'b10) begin + for (int i = 2; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else begin + for (int i = 3; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end + end else if (cnt == (size - 1)) begin + if (addr[2:1] == 2'b00) begin + for (int i = 0; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else if (addr[2:1] == 2'b01) begin + for (int i = 0; i < 1; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else if (addr[2:1] == 2'b10) begin + for (int i = 0; i < 2; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end else begin + for (int i = 0; i < 3; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end + end else begin + for (int i = 0; i < 4; i++) begin + data_q.push_back( + proc_rd_data[CGRA_DATA_WIDTH*i+:CGRA_DATA_WIDTH]); + end + end + cnt = cnt + 1; + if (cnt == size) break; + end + end + end + begin + repeat (2 * size + 100) @(posedge clk); + $display("@%0t: %m ERROR: glb-mem burst read timeout ", $time); + end + join_any + disable fork; + end + join + data_out = data_q; + data = convert_16b_to_64b(data_out); + repeat (5) @(posedge clk); + $display("Finish glb-mem burst read"); + endtask + + task automatic g2f_start(input [NUM_GLB_TILES-1:0] tile_id_mask); + $display("g2f streaming start. tiles: 0x%4h", tile_id_mask); + + // Enable glb2prr + for (int i = 0; i < NUM_PRR; i++) begin + if (tile_id_mask[i] == 1) begin + void'($root.top.cgra.glb2prr_on(i)); + end + end + + #2 strm_g2f_start_pulse <= tile_id_mask; + @(posedge clk); + #2 strm_g2f_start_pulse <= 0; + endtask + + task automatic g2f_run(input int tile_id, int total_cycle); + $display("g2f run total cycles: %0d", total_cycle); + fork : interrupt_timeout + begin + wait (strm_g2f_interrupt_pulse[tile_id]); + $display("g2f streaming done."); + end + begin + repeat (total_cycle + 30) @(posedge clk); + $display("@%0t: %m ERROR: glb stream g2f interrupt timeout ", $time); + end + join_any + disable fork; + @(posedge clk); + endtask + + task automatic f2g_start(input [NUM_GLB_TILES-1:0] tile_id_mask); + $display("f2g streaming start. tiles: 0x%4h", tile_id_mask); + + #2 strm_f2g_start_pulse <= tile_id_mask; + @(posedge clk); + #2 strm_f2g_start_pulse <= 0; + + repeat (GLS_PIPELINE_DEPTH + 3) @(posedge clk); + // Enable glb2prr + for (int i = 0; i < NUM_PRR; i++) begin + if (tile_id_mask[i] == 1) begin + void'($root.top.cgra.prr2glb_on(i)); + end + end + + endtask + + task automatic f2g_run(input int tile_id, int total_cycle); + $display("f2g run total cycles: %0d", total_cycle); + fork : interrupt_timeout + begin + wait (strm_f2g_interrupt_pulse[tile_id]); + $display("f2g streaming done."); + end + begin + repeat (total_cycle + 30) @(posedge clk); + $display("@%0t: %m ERROR: cgra stream f2g interrupt timeout ", $time); + end + join_any + disable fork; + @(posedge clk); + endtask + + task automatic pcfg_start(input [NUM_GLB_TILES-1:0] tile_id_mask); + $display("pcfg streaming start. tiles: 0x%4h", tile_id_mask); + repeat (5) @(posedge clk); + #2 pcfg_start_pulse <= tile_id_mask; + @(posedge clk); + #2 pcfg_start_pulse <= 0; + endtask + + task automatic pcfg_run(input int tile_id, int check_tile_id, int total_cycle, + ref [CGRA_CFG_ADDR_WIDTH + CGRA_CFG_DATA_WIDTH - 1:0] cgra_cfg_out[]); + int cnt = 0; + fork : interrupt_timeout + begin + wait (pcfg_g2f_interrupt_pulse[tile_id]); + end + begin + repeat (total_cycle + 50) @(posedge clk); + $display("@%0t: %m ERROR: glb stream pcfg interrupt timeout ", $time); + end + begin + forever begin + // Check the second column + if (cgra_cfg_g2f_cfg_wr_en[check_tile_id][CGRA_PER_GLB-1] == 1) begin + cgra_cfg_out[ + cnt++ + ] = { + cgra_cfg_g2f_cfg_addr[check_tile_id][CGRA_PER_GLB-1], + cgra_cfg_g2f_cfg_data[check_tile_id][CGRA_PER_GLB-1] + }; + end + @(posedge clk); + end + end + join_any + disable fork; + @(posedge clk); + $display("pcfg streaming done."); + endtask + + function automatic void read_prr(input int prr_id, + ref [CGRA_DATA_WIDTH-1:0] cgra_data_arr_out[]); + if (cgra_data_arr_out.size() != $root.top.cgra.glb2prr_q[prr_id].size) begin + $error( + "@%0t: %m FAIL: glb stream to PRR data size is different.\nExpected data size: %d, PRR data size: %d", + $time, cgra_data_arr_out.size(), $root.top.cgra.glb2prr_q[prr_id].size); + end else begin + foreach ($root.top.cgra.glb2prr_q[prr_id][i]) begin + cgra_data_arr_out[i] = $root.top.cgra.glb2prr_q[prr_id][i]; + end + end + endfunction + + function automatic void write_prr(input int prr_id, + ref [CGRA_DATA_WIDTH-1:0] cgra_data_arr[]); + foreach (cgra_data_arr[i]) begin + $root.top.cgra.prr2glb_q[prr_id][i] = cgra_data_arr[i]; + end + endfunction + + function automatic bit [NUM_GLB_TILES-1:0] update_tile_mask( + int tile_id, [NUM_GLB_TILES-1:0] tile_id_mask); + bit [NUM_GLB_TILES-1:0] new_tile_id_mask; + new_tile_id_mask = tile_id_mask | (1 << tile_id); + return new_tile_id_mask; + endfunction + + function void assert_(bit cond, string msg); + assert (cond) + else begin + $display("%s", msg); + $stacktrace; + $finish(1); + end + endfunction + + function automatic int compare_64b_arr(ref [63:0] data_arr_0[], ref [63:0] data_arr_1[]); + int size_0 = data_arr_0.size(); + int size_1 = data_arr_1.size(); + int err; + if (size_0 != size_1) begin + $display("Data array size is different. data_arr_0: %0d, data_arr_1: %0d", size_0, + size_1); + err++; + end + foreach (data_arr_0[i]) begin + if (data_arr_0[i] !== data_arr_1[i]) begin + err++; + if (err > MAX_NUM_ERRORS) begin + $display("The number of errors reached %0d. Do not print anymore", + MAX_NUM_ERRORS); + break; + end + $display("Data different. index: %0d, data_arr_0: 0x%0h, data_arr_1: 0x%0h", i, + data_arr_0[i], data_arr_1[i]); + end + end + if (err > 0) begin + $error("Two data array are Different"); + return 1; + end + $display("Two data array are same"); + return 0; + endfunction + + function automatic int compare_16b_arr(ref [15:0] data_arr_0[], ref [15:0] data_arr_1[]); + int size_0 = data_arr_0.size(); + int size_1 = data_arr_1.size(); + int err; + if (size_0 != size_1) begin + $display("Data array size is different. data_arr_0: %0d, data_arr_1: %0d", size_0, + size_1); + err++; + end + foreach (data_arr_0[i]) begin + if (data_arr_0[i] !== data_arr_1[i]) begin + err++; + if (err > MAX_NUM_ERRORS) begin + $display("The number of errors reached %0d. Do not print anymore", + MAX_NUM_ERRORS); + break; + end + $display("Data different. index: %0d, data_arr_0: 0x%0h, data_arr_1: 0x%0h", i, + data_arr_0[i], data_arr_1[i]); + end + end + if (err > 0) begin + $error("Two data array are Different"); + return 1; + end + $display("Two data array are same"); + return 0; + endfunction + + function int compare_cfg(logic [BANK_DATA_WIDTH-1:0] cfg_0, + logic [AXI_DATA_WIDTH-1:0] cfg_1); + if (cfg_0 !== cfg_1) begin + $display("cfg data is different. cfg_0: 0x%0h, cfg_1: 0x%0h", cfg_0, cfg_1); + return 1; + end + return 0; + endfunction + + function automatic data64 convert_16b_to_64b(ref [15:0] data_in[]); + data64 data_out; + int num_data_in = data_in.size(); + int num_data_out = (num_data_in + 3) / 4; + data_out = new[num_data_out]; + for (int i = 0; i < num_data_in; i = i + 4) begin + if (i == (num_data_in - 1)) begin + if (num_data_in % 4 == 1) begin + data_out[i/4] = {{48{1'b1}}, data_in[i]}; + end else if (num_data_in % 4 == 2) begin + data_out[i/4] = {{32{1'b1}}, data_in[i+1], data_in[i]}; + end else if (num_data_in % 4 == 3) begin + data_out[i/4] = {{16{1'b1}}, data_in[i+2], data_in[i+1], data_in[i]}; + end else begin + data_out[i/4] = {data_in[i+3], data_in[i+2], data_in[i+1], data_in[i]}; + end + end + data_out[i/4] = {data_in[i+3], data_in[i+2], data_in[i+1], data_in[i]}; + end + return data_out; + endfunction + + function automatic data16 convert_64b_to_16b(ref [63:0] data_in[]); + data16 data_out; + int num_data_in = data_in.size(); + int num_data_out = num_data_in * 4; + data_out = new[num_data_out]; + for (int i = 0; i < num_data_in; i++) begin + data_out[i*4+0] = data_in[i][0+:16]; + data_out[i*4+1] = data_in[i][16+:16]; + data_out[i*4+2] = data_in[i][32+:16]; + data_out[i*4+3] = data_in[i][48+:16]; + end + return data_out; + endfunction + + function automatic int read_cgra_cfg( + ref [CGRA_CFG_ADDR_WIDTH+CGRA_CFG_DATA_WIDTH-1:0] bs_arr[]); + bit [CGRA_CFG_ADDR_WIDTH-1:0] bs_addr; + bit [CGRA_CFG_DATA_WIDTH-1:0] bs_data; + bit [CGRA_CFG_DATA_WIDTH-1:0] rd_data; + int err; + assert_($root.top.cgra.cfg_wr_en == 0, $sformatf( + "Configuration wr_en should go 0 after configuration")); + for (int i = 0; i < bs_arr.size(); i++) begin + bs_addr = bs_arr[i][CGRA_CFG_DATA_WIDTH+:CGRA_CFG_ADDR_WIDTH]; + bs_data = bs_arr[i][0+:CGRA_CFG_DATA_WIDTH]; + rd_data = $root.top.cgra.cfg_read(bs_addr); + if (rd_data != bs_data) begin + $display("bitstream addr :0x%8h is different. Gold: 0x%8h, Read: 0x%8h", bs_addr, + bs_data, rd_data); + err++; + end + end + if (err > 0) begin + return 1; + end + $display("Bitstream are same"); + return 0; + endfunction + +endprogram diff --git a/global_buffer/sim_amber/kernel.sv b/global_buffer/sim_amber/kernel.sv new file mode 100644 index 0000000000..0c580a13a9 --- /dev/null +++ b/global_buffer/sim_amber/kernel.sv @@ -0,0 +1,198 @@ +typedef enum int { + WR = 0, + RD = 1, + G2F = 2, + F2G = 3, + PCFG = 4, + SRAM = 5 +} stream_type; + +typedef logic [CGRA_DATA_WIDTH-1:0] data16[]; +typedef logic [BANK_DATA_WIDTH-1:0] data64[]; + +class Kernel; + static int cnt = 0; + stream_type type_; + int tile_id; + int bank_id; + int start_addr; + int cycle_start_addr; + int check_tile_id; + int dim; + int extent[LOOP_LEVEL]; + int cycle_stride[LOOP_LEVEL]; + int data_stride[LOOP_LEVEL]; + int new_extent[LOOP_LEVEL]; + int new_cycle_stride[LOOP_LEVEL]; + int new_data_stride[LOOP_LEVEL]; + string filename; + data16 mem; + data16 data_arr; + data16 data_arr_out; + int total_cycle; + data64 data64_arr; + data64 data64_arr_out; +endclass + +class Test; + const int tile_offset = 1 << (BANK_ADDR_WIDTH + BANK_SEL_ADDR_WIDTH); + const int bank_offset = 1 << BANK_ADDR_WIDTH; + string filename; + int num_kernels; + int data_network_mask; + Kernel kernels[]; + bit [NUM_GLB_TILES-1:0] g2f_tile_mask; + bit [NUM_GLB_TILES-1:0] f2g_tile_mask; + bit [NUM_GLB_TILES-1:0] pcfg_tile_mask; + bit [NUM_GLB_TILES-1:0] pcfg_broadcast_stall_mask; + bit [`GLB_PCFG_BROADCAST_MUX_R_MSB:0] pcfg_broadcast_mux_value[NUM_GLB_TILES-1:0]; + + extern function new(string filename); +endclass + +function Test::new(string filename); + int fd = $fopen(filename, "r"); + string type_, data_filename; + int tile_id, bank_id, dim; + int tmp_start_addr, tmp_cycle_start_addr; + string cycle_stride_s, extent_s, data_stride_s, tmp_s; + string new_cycle_stride_s, new_extent_s, new_data_stride_s, new_tmp_s; + string line; + int start_tile, end_tile, tmp_tile; + + $display("\n---- Test Initialization ----"); + if (fd) $display("Test file open %s", filename); + else $error("Cannot open %s", filename); + // Skip the first line + void'($fgets(line, fd)); + $display("[Description] %s", line); + void'($fscanf(fd, " %d", num_kernels)); + void'($fscanf(fd, " %b", data_network_mask)); + kernels = new[num_kernels]; + for (int i = 0; i < num_kernels; i++) begin + kernels[i] = new(); + void'($fscanf( + fd, " %s%d%d%d%d", type_, tile_id, bank_id, tmp_start_addr, tmp_cycle_start_addr + )); + if (type_ == "WR") kernels[i].type_ = WR; + else if (type_ == "RD") kernels[i].type_ = RD; + else if (type_ == "G2F") kernels[i].type_ = G2F; + else if (type_ == "F2G") kernels[i].type_ = F2G; + else if (type_ == "PCFG") kernels[i].type_ = PCFG; + else if (type_ == "SRAM") kernels[i].type_ = SRAM; + else $error("This type [%s] is not supported", type_); + void'($fscanf(fd, " %d", dim)); + kernels[i].tile_id = tile_id; + kernels[i].bank_id = bank_id; + kernels[i].start_addr = bank_offset * bank_id + tmp_start_addr; + kernels[i].cycle_start_addr = tmp_cycle_start_addr; + kernels[i].check_tile_id = tmp_cycle_start_addr; + kernels[i].dim = dim; + for (int j = 0; j < dim; j++) begin + void'($fscanf(fd, " %d", kernels[i].extent[j])); + end + for (int j = 0; j < dim; j++) begin + void'($fscanf(fd, " %d", kernels[i].cycle_stride[j])); + end + for (int j = 0; j < dim; j++) begin + void'($fscanf(fd, " %d", kernels[i].data_stride[j])); + end + void'($fscanf(fd, " %s", data_filename)); + kernels[i].filename = data_filename; + + // Make extent/stride hardware-friendly + for (int j = 0; j < dim; j++) begin + kernels[i].new_extent[j] = kernels[i].extent[j] - 2; + kernels[i].new_cycle_stride[j] = kernels[i].cycle_stride[j]; + kernels[i].new_data_stride[j] = kernels[i].data_stride[j]; + for (int k = 0; k < j; k++) begin + kernels[i].new_cycle_stride[j] -= kernels[i].cycle_stride[k] * (kernels[i].extent[k] - 1); + kernels[i].new_data_stride[j] -= kernels[i].data_stride[k] * (kernels[i].extent[k] - 1); + end + kernels[i].new_data_stride[j] = kernels[i].new_data_stride[j] << CGRA_BYTE_OFFSET; + end + end + $fclose(fd); + + for (int i = 0; i < num_kernels; i++) begin + if (kernels[i].type_ == G2F) g2f_tile_mask[kernels[i].tile_id] = 1; + else if (kernels[i].type_ == F2G) f2g_tile_mask[kernels[i].tile_id] = 1; + else if (kernels[i].type_ == PCFG) pcfg_tile_mask[kernels[i].tile_id] = 1; + end + + // Calculate total cycles + for (int i = 0; i < num_kernels; i++) begin + if (kernels[i].type_ == G2F || kernels[i].type_ == F2G) begin + kernels[i].total_cycle = kernels[i].cycle_stride[kernels[i].dim - 1] * kernels[i].extent[kernels[i].dim - 1] + kernels[i].cycle_start_addr; + end else if (kernels[i].type_ == PCFG) begin + kernels[i].total_cycle = kernels[i].extent[0] / 4; + end + end + + // Calculate stall + pcfg_broadcast_stall_mask = {NUM_GLB_TILES{1'b1}}; + for (int i = 0; i < num_kernels; i++) begin + // For PCFG test, unstall pcfg logic + if (kernels[i].type_ == PCFG) begin + pcfg_broadcast_stall_mask = 0; + break; + end + end + + for (int i = 0; i < NUM_GLB_TILES; i++) begin + pcfg_broadcast_mux_value[i] = 0; + end + for (int i = 0; i < num_kernels; i++) begin + if (kernels[i].type_ == PCFG) begin + start_tile = kernels[i].tile_id; + end_tile = kernels[i].check_tile_id; + if (start_tile <= end_tile) begin + pcfg_broadcast_mux_value[start_tile] = (1 << `GLB_PCFG_BROADCAST_MUX_SOUTH_F_LSB) | (1 << `GLB_PCFG_BROADCAST_MUX_EAST_F_LSB) | (0 << `GLB_PCFG_BROADCAST_MUX_WEST_F_LSB); + for (int j = start_tile + 1; j <= end_tile; j++) begin + pcfg_broadcast_mux_value[j] = (2 << `GLB_PCFG_BROADCAST_MUX_SOUTH_F_LSB) | (2 << `GLB_PCFG_BROADCAST_MUX_EAST_F_LSB) | (0 << `GLB_PCFG_BROADCAST_MUX_WEST_F_LSB); + end + end else begin + pcfg_broadcast_mux_value[start_tile] = (1 << `GLB_PCFG_BROADCAST_MUX_SOUTH_F_LSB) | (0 << `GLB_PCFG_BROADCAST_MUX_EAST_F_LSB) | (1 << `GLB_PCFG_BROADCAST_MUX_WEST_F_LSB); + for (int j = start_tile - 1; j >= end_tile; j--) begin + pcfg_broadcast_mux_value[j] = (3 << `GLB_PCFG_BROADCAST_MUX_SOUTH_F_LSB) | (0 << `GLB_PCFG_BROADCAST_MUX_EAST_F_LSB) | (2 << `GLB_PCFG_BROADCAST_MUX_WEST_F_LSB); + end + end + end + end + + // Display log + $display("Number of kernels in the app is %0d", num_kernels); + $display("Data interconnect of app is %16b", data_network_mask); + foreach (kernels[i]) begin + extent_s = ""; + cycle_stride_s = ""; + data_stride_s = ""; + new_extent_s = ""; + new_cycle_stride_s = ""; + new_data_stride_s = ""; + $display( + "Kernel %0d: Type: %s, Tile_ID: %0d, Bank_ID: %0d, Start_addr: %0d, Cycle_start_addr: %0d", + i, kernels[i].type_.name(), kernels[i].tile_id, kernels[i].bank_id, + kernels[i].start_addr, kernels[i].cycle_start_addr); + for (int j = 0; j < kernels[i].dim; j++) begin + tmp_s.itoa(kernels[i].extent[j]); + new_tmp_s.itoa(kernels[i].new_extent[j]); + extent_s = {extent_s, " ", tmp_s}; + new_extent_s = {new_extent_s, " ", new_tmp_s}; + + tmp_s.itoa(kernels[i].cycle_stride[j]); + new_tmp_s.itoa(kernels[i].new_cycle_stride[j]); + cycle_stride_s = {cycle_stride_s, " ", tmp_s}; + new_cycle_stride_s = {new_cycle_stride_s, " ", new_tmp_s}; + + tmp_s.itoa(kernels[i].data_stride[j]); + new_tmp_s.itoa(kernels[i].new_data_stride[j]); + data_stride_s = {data_stride_s, " ", tmp_s}; + new_data_stride_s = {new_data_stride_s, " ", new_tmp_s}; + end + $display("[BEFORE] Extent: [%s], cycle_stride: [%s], data_stride: [%s]", extent_s, + cycle_stride_s, data_stride_s); + $display("[AFTER] Extent: [%s], cycle_stride: [%s], data_stride: [%s]", new_extent_s, + new_cycle_stride_s, new_data_stride_s); + end +endfunction diff --git a/global_buffer/sim_amber/tb_global_buffer.f b/global_buffer/sim_amber/tb_global_buffer.f new file mode 100644 index 0000000000..3a5b58ed44 --- /dev/null +++ b/global_buffer/sim_amber/tb_global_buffer.f @@ -0,0 +1,4 @@ +top.sv +kernel.sv +glb_test.sv +cgra.sv diff --git a/global_buffer/sim_amber/top.sv b/global_buffer/sim_amber/top.sv new file mode 100644 index 0000000000..5f493d038b --- /dev/null +++ b/global_buffer/sim_amber/top.sv @@ -0,0 +1,262 @@ +/*============================================================================= +** Module: top.sv +** Description: +** simple top testbench for glb +** Author: Taeyoung Kong +** Change history: 05/22/2021 - Implement first version of testbench +**===========================================================================*/ +`define TIMEUNIT 100ps +`define TIMEPRECISION 1ps +`define CLK_PERIOD 15 +`define CLK_SRC_LATENCY -5 + +import global_buffer_param::*; + +module top; + timeunit `TIMEUNIT; + timeprecision `TIMEPRECISION; + +`ifdef PWR + supply1 VDD; + supply0 VSS; +`endif + + // --------------------------------------- + // GLB signals + // --------------------------------------- + logic clk; + logic dut_clk; + logic [NUM_GLB_TILES-1:0] pcfg_broadcast_stall; + logic [NUM_GLB_TILES-1:0] glb_clk_en_master; + logic [NUM_GLB_TILES-1:0] glb_clk_en_bank_master; + logic [NUM_GROUPS-1:0][$clog2(NUM_GLB_TILES)-1:0] flush_crossbar_sel; + logic reset; + logic cgra_soft_reset; + + // cgra configuration from global controller + logic cgra_cfg_jtag_gc2glb_wr_en; + logic cgra_cfg_jtag_gc2glb_rd_en; + logic [CGRA_CFG_ADDR_WIDTH-1:0] cgra_cfg_jtag_gc2glb_addr; + logic [CGRA_CFG_DATA_WIDTH-1:0] cgra_cfg_jtag_gc2glb_data; + + // control pulse + logic [NUM_GLB_TILES-1:0] strm_g2f_start_pulse; + logic [NUM_GLB_TILES-1:0] strm_f2g_start_pulse; + logic [NUM_GLB_TILES-1:0] pcfg_start_pulse; + logic [NUM_GLB_TILES-1:0] strm_f2g_interrupt_pulse; + logic [NUM_GLB_TILES-1:0] strm_g2f_interrupt_pulse; + logic [NUM_GLB_TILES-1:0] pcfg_g2f_interrupt_pulse; + + // Processor + logic proc_wr_en; + logic [BANK_DATA_WIDTH/8-1:0] proc_wr_strb; + logic [GLB_ADDR_WIDTH-1:0] proc_wr_addr; + logic [BANK_DATA_WIDTH-1:0] proc_wr_data; + logic proc_rd_en; + logic [GLB_ADDR_WIDTH-1:0] proc_rd_addr; + logic [BANK_DATA_WIDTH-1:0] proc_rd_data; + logic proc_rd_data_valid; + + // configuration of glb from glc + logic if_cfg_wr_en; + logic if_cfg_wr_clk_en; + logic [AXI_ADDR_WIDTH-1:0] if_cfg_wr_addr; + logic [AXI_DATA_WIDTH-1:0] if_cfg_wr_data; + logic if_cfg_rd_en; + logic if_cfg_rd_clk_en; + logic [AXI_ADDR_WIDTH-1:0] if_cfg_rd_addr; + logic [AXI_DATA_WIDTH-1:0] if_cfg_rd_data; + logic if_cfg_rd_data_valid; + + // configuration of sram from glc + logic if_sram_cfg_wr_en; + logic [GLB_ADDR_WIDTH-1:0] if_sram_cfg_wr_addr; + logic [AXI_DATA_WIDTH-1:0] if_sram_cfg_wr_data; + logic if_sram_cfg_rd_en; + logic [GLB_ADDR_WIDTH-1:0] if_sram_cfg_rd_addr; + logic [AXI_DATA_WIDTH-1:0] if_sram_cfg_rd_data; + logic if_sram_cfg_rd_data_valid; + + // BOTTOM + // stall + + // cgra to glb streaming word + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_DATA_WIDTH-1:0] strm_data_f2g; + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] strm_data_valid_f2g; + + // glb to cgra streaming word + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_DATA_WIDTH-1:0] strm_data_g2f; + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] strm_data_valid_g2f; + logic [NUM_GROUPS-1:0] strm_data_flush_g2f; + + // cgra configuration to cgra + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] cgra_cfg_g2f_cfg_wr_en; + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0] cgra_cfg_g2f_cfg_rd_en; + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_CFG_ADDR_WIDTH-1:0] cgra_cfg_g2f_cfg_addr; + logic [NUM_GLB_TILES-1:0][CGRA_PER_GLB-1:0][CGRA_CFG_DATA_WIDTH-1:0] cgra_cfg_g2f_cfg_data; + logic [NUM_GLB_TILES-1:0][CGRA_CFG_DATA_WIDTH-1:0] cgra_cfg_f2g_cfg_data; + + // --------------------------------------- + // CGRA signals + // --------------------------------------- + logic [NUM_PRR-1:0] g2c_cfg_wr_en; + logic [NUM_PRR-1:0][CGRA_CFG_ADDR_WIDTH-1:0] g2c_cfg_wr_addr; + logic [NUM_PRR-1:0][CGRA_CFG_DATA_WIDTH-1:0] g2c_cfg_wr_data; + logic [NUM_PRR-1:0] g2c_cfg_rd_en; + logic [NUM_PRR-1:0][CGRA_CFG_ADDR_WIDTH-1:0] g2c_cfg_rd_addr; + logic [NUM_PRR-1:0][CGRA_CFG_DATA_WIDTH-1:0] c2g_cfg_rd_data; + + logic [NUM_PRR-1:0] g2c_io1; + logic [NUM_PRR-1:0][15:0] g2c_io16; + logic [NUM_PRR-1:0] c2g_io1; + logic [NUM_PRR-1:0][15:0] c2g_io16; + + // max cycle set + initial begin + repeat (10000000) @(posedge clk); + $display("\n%0t\tERROR: The 10000000 cycles marker has passed!", $time); + $finish(2); + end + + // reset generation + task automatic assert_reset(); + reset <= 1; + repeat (10) @(posedge clk); + reset <= 0; + repeat (10) @(posedge clk); + endtask + + initial begin + clk = 0; + dut_clk = 0; + assert_reset(); + end + + always #(`CLK_PERIOD / 2.0) clk = !clk; + always @ (*) dut_clk <= #(`CLK_PERIOD + `CLK_SRC_LATENCY) clk; + + // instantiate test + glb_test test ( + .clk (clk), + .reset (reset), + // proc ifc + .proc_wr_en (proc_wr_en), + .proc_wr_strb (proc_wr_strb), + .proc_wr_addr (proc_wr_addr), + .proc_wr_data (proc_wr_data), + .proc_rd_en (proc_rd_en), + .proc_rd_addr (proc_rd_addr), + .proc_rd_data (proc_rd_data), + .proc_rd_data_valid (proc_rd_data_valid), + // config ifc + .if_cfg_wr_en (if_cfg_wr_en), + .if_cfg_wr_clk_en (if_cfg_wr_clk_en), + .if_cfg_wr_addr (if_cfg_wr_addr), + .if_cfg_wr_data (if_cfg_wr_data), + .if_cfg_rd_en (if_cfg_rd_en), + .if_cfg_rd_clk_en (if_cfg_rd_clk_en), + .if_cfg_rd_addr (if_cfg_rd_addr), + .if_cfg_rd_data (if_cfg_rd_data), + .if_cfg_rd_data_valid (if_cfg_rd_data_valid), + // sram config ifc + .if_sram_cfg_wr_en (if_sram_cfg_wr_en), + .if_sram_cfg_wr_addr (if_sram_cfg_wr_addr), + .if_sram_cfg_wr_data (if_sram_cfg_wr_data), + .if_sram_cfg_rd_en (if_sram_cfg_rd_en), + .if_sram_cfg_rd_addr (if_sram_cfg_rd_addr), + .if_sram_cfg_rd_data (if_sram_cfg_rd_data), + .if_sram_cfg_rd_data_valid(if_sram_cfg_rd_data_valid), + .* + ); + + // instantiate dut + global_buffer dut ( + .clk (dut_clk), + // proc ifc + .proc_wr_en (proc_wr_en), + .proc_wr_strb (proc_wr_strb), + .proc_wr_addr (proc_wr_addr), + .proc_wr_data (proc_wr_data), + .proc_rd_en (proc_rd_en), + .proc_rd_addr (proc_rd_addr), + .proc_rd_data (proc_rd_data), + .proc_rd_data_valid (proc_rd_data_valid), + // config ifc + .if_cfg_wr_en (if_cfg_wr_en), + .if_cfg_wr_clk_en (if_cfg_wr_clk_en), + .if_cfg_wr_addr (if_cfg_wr_addr), + .if_cfg_wr_data (if_cfg_wr_data), + .if_cfg_rd_en (if_cfg_rd_en), + .if_cfg_rd_clk_en (if_cfg_rd_clk_en), + .if_cfg_rd_addr (if_cfg_rd_addr), + .if_cfg_rd_data (if_cfg_rd_data), + .if_cfg_rd_data_valid (if_cfg_rd_data_valid), + // sram config ifc + .if_sram_cfg_wr_en (if_sram_cfg_wr_en), + .if_sram_cfg_wr_addr (if_sram_cfg_wr_addr), + .if_sram_cfg_wr_data (if_sram_cfg_wr_data), + .if_sram_cfg_rd_en (if_sram_cfg_rd_en), + .if_sram_cfg_rd_addr (if_sram_cfg_rd_addr), + .if_sram_cfg_rd_data (if_sram_cfg_rd_data), + .if_sram_cfg_rd_data_valid(if_sram_cfg_rd_data_valid), + + // cgra-glb + .strm_data_valid_f2g (strm_data_valid_f2g), + .strm_data_f2g (strm_data_f2g), + +`ifdef PWR + .VDD (VDD), + .VSS (VSS), +`endif + .* + ); + + cgra cgra ( + // stall + .stall ( {NUM_PRR{1'b0}} ), + // configuration + .cfg_wr_en (g2c_cfg_wr_en), + .cfg_wr_addr(g2c_cfg_wr_addr), + .cfg_wr_data(g2c_cfg_wr_data), + .cfg_rd_en (g2c_cfg_rd_en), + .cfg_rd_addr(g2c_cfg_rd_addr), + .cfg_rd_data(c2g_cfg_rd_data), + // data + .io1_g2io (g2c_io1), + .io16_g2io (g2c_io16), + .io1_io2g (c2g_io1), + .io16_io2g (c2g_io16), + .* + ); + + // Configuration interface + // TODO: Assume that NUM_PRR == NUM_GLB_TILES. Use the first one among two signals. + always_comb begin + for (int i = 0; i < NUM_PRR; i++) begin + g2c_cfg_wr_en[i] = cgra_cfg_g2f_cfg_wr_en[i][0]; + g2c_cfg_wr_addr[i] = cgra_cfg_g2f_cfg_addr[i][0]; + g2c_cfg_wr_data[i] = cgra_cfg_g2f_cfg_data[i][0]; + g2c_cfg_rd_en[i] = cgra_cfg_g2f_cfg_rd_en[i][0]; + g2c_cfg_rd_addr[i] = cgra_cfg_g2f_cfg_addr[i][0]; + end + end + + // Data interface + // Note: Connect g2f to [0] column. Connect f2g to [1] column. + always_comb begin + for (int i = 0; i < NUM_PRR; i++) begin + g2c_io1[i] = strm_data_valid_g2f[i][0]; + g2c_io16[i] = strm_data_g2f[i][0]; + end + end + + always @ (*) begin + for (int i = 0; i < NUM_PRR; i++) begin + strm_data_valid_f2g[i][0] <= #4 0; + strm_data_valid_f2g[i][1] <= #4 c2g_io1[i]; + strm_data_f2g[i][0] <= #4 0; + strm_data_f2g[i][1] <= #4 c2g_io16[i]; + end + end + +endmodule From 7d4cbcbf45a0614bc633ff1be21c5d8daa994278 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 16 Dec 2022 07:41:08 -0800 Subject: [PATCH 50/63] final tweaks on global_buffer_parameter.py --- global_buffer/design/global_buffer_parameter.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/global_buffer/design/global_buffer_parameter.py b/global_buffer/design/global_buffer_parameter.py index 3a6f846932..57349eec03 100644 --- a/global_buffer/design/global_buffer_parameter.py +++ b/global_buffer/design/global_buffer_parameter.py @@ -84,6 +84,8 @@ def sram_macro_depth(self): cgra_axi_data_width: int = 32 cgra_cfg_addr_width: int = 32 cgra_cfg_data_width: int = 32 + + # Not used by TSMC (yet) load_dma_fifo_depth: int = 16 store_dma_fifo_depth: int = 4 max_num_chain: int = 8 @@ -95,7 +97,7 @@ def sram_macro_depth(self): tsmc_sram_macro_prefix: str = "TS1N16FFCLLSBLVTC2048X64M8SW" gf_sram_macro_prefix: str = "IN12LP_S1DB_" - # constant variables + # constant variables: not used by TSMC (yet) st_dma_valid_mode_valid: int = 0 st_dma_valid_mode_ready_valid: int = 1 st_dma_valid_mode_static: int = 2 @@ -160,13 +162,16 @@ def sram_macro_depth(self): + sram_macro_read_latency ) + # Not used by TSMC (yet) flush_crossbar_pipeline_depth: int = 1 + rd_clk_en_margin: int = 3 wr_clk_en_margin: int = 3 proc_clk_en_margin: int = 4 is_sram_stub: int = 0 + # Not used by TSMC (yet) config_port_pipeline_depth: int = 1 # cycle count data width @@ -175,6 +180,9 @@ def sram_macro_depth(self): # interrupt cnt interrupt_cnt: int = 5 + # TSMC-specific tweaks + if os.getenv('WHICH_SOC') == "amber": + process = "TSMC" def gen_global_buffer_params(**kwargs): # User-defined parameters @@ -189,6 +197,9 @@ def gen_global_buffer_params(**kwargs): cfg_addr_width = kwargs.pop('cfg_addr_width', 32) cfg_data_width = kwargs.pop('cfg_data_width', 32) is_sram_stub = kwargs.pop('is_sram_stub', 0) + + # config_port_pipeline not used by TSMC (yet) + # pop() returns True if config_port_pipeline not exists config_port_pipeline = kwargs.pop('config_port_pipeline', True) if config_port_pipeline is True: From c0ca0a8613675dd3a939c2c849d9e5c09c21c258 Mon Sep 17 00:00:00 2001 From: steveri Date: Sat, 24 Dec 2022 11:14:46 -0800 Subject: [PATCH 51/63] unified amber/onyx global_buffer.py --- global_buffer/design/global_buffer.py | 352 ++++++++++++++++++++++---- 1 file changed, 307 insertions(+), 45 deletions(-) diff --git a/global_buffer/design/global_buffer.py b/global_buffer/design/global_buffer.py index 07988fbe30..2cab184d64 100644 --- a/global_buffer/design/global_buffer.py +++ b/global_buffer/design/global_buffer.py @@ -1,3 +1,4 @@ +import os from kratos import Generator, always_ff, posedge, always_comb, clock_en, clog2, const, concat, resize from kratos.util import to_magma from global_buffer.design.glb_tile import GlbTile @@ -23,8 +24,11 @@ def __init__(self, _params: GlobalBufferParams): self.flush_crossbar_sel = self.input("flush_crossbar_sel", clog2( self._params.num_glb_tiles) * self._params.num_groups) self.reset = self.reset("reset") - self.cgra_stall_in = self.input("cgra_stall_in", self._params.num_cgra_cols) - self.cgra_stall = self.output("cgra_stall", self._params.num_cgra_cols) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + self.cgra_stall_in = self.input("cgra_stall_in", self._params.num_cgra_cols) + self.cgra_stall = self.output("cgra_stall", self._params.num_cgra_cols) self.proc_wr_en = self.input("proc_wr_en", 1) self.proc_wr_strb = self.input("proc_wr_strb", self._params.bank_strb_width) @@ -60,21 +64,32 @@ def __init__(self, _params: GlobalBufferParams): self.strm_data_f2g = self.input("strm_data_f2g", self._params.cgra_data_width, size=[ self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_f2g_vld = self.input("strm_data_f2g_vld", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_f2g_rdy = self.output("strm_data_f2g_rdy", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_ctrl_f2g = self.input("strm_ctrl_f2g", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + + if os.getenv('WHICH_SOC') == "amber": + self.strm_data_valid_f2g = self.input("strm_data_valid_f2g", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + else: + self.strm_data_f2g_vld = self.input("strm_data_f2g_vld", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_f2g_rdy = self.output("strm_data_f2g_rdy", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_ctrl_f2g = self.input("strm_ctrl_f2g", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.strm_data_g2f = self.output("strm_data_g2f", self._params.cgra_data_width, size=[ self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_g2f_vld = self.output("strm_data_g2f_vld", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_g2f_rdy = self.input("strm_data_g2f_rdy", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_ctrl_g2f = self.output("strm_ctrl_g2f", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + + if os.getenv('WHICH_SOC') == "amber": + self.strm_data_valid_g2f = self.output("strm_data_valid_g2f", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + else: + self.strm_data_g2f_vld = self.output("strm_data_g2f_vld", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_g2f_rdy = self.input("strm_data_g2f_rdy", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_ctrl_g2f = self.output("strm_ctrl_g2f", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_flush_g2f = self.output("strm_data_flush_g2f", 1, size=self._params.num_groups, packed=True) self.cgra_cfg_g2f_cfg_wr_en = self.output("cgra_cfg_g2f_cfg_wr_en", 1, size=[ @@ -98,8 +113,12 @@ def __init__(self, _params: GlobalBufferParams): self.bank_msb_data_width = self._params.bank_data_width - self._params.axi_data_width # local variables - self.data_flush = self.var("data_flush", self._params.num_glb_tiles) - self.data_flush_d = self.var("data_flush_d", self._params.num_glb_tiles) + if os.getenv('WHICH_SOC') == "amber": + self.data_flush = self.var("data_flush", 1, size=self._params.num_glb_tiles, packed=True) + else: + self.data_flush = self.var("data_flush", self._params.num_glb_tiles) + self.data_flush_d = self.var("data_flush_d", self._params.num_glb_tiles) + self.proc_rd_type_e = self.enum("proc_rd_type_e", {"axi": 0, "jtag": 1}) self.proc_rd_type = self.var("proc_rd_type", self.proc_rd_type_e) self.proc_rd_addr_sel = self.var("proc_rd_addr_sel", 1) @@ -109,8 +128,11 @@ def __init__(self, _params: GlobalBufferParams): self.proc_wr_data_d = self.var("proc_wr_data_d", self._params.bank_data_width) self.proc_rd_en_d = self.var("proc_rd_en_d", 1) self.proc_rd_addr_d = self.var("proc_rd_addr_d", self._params.glb_addr_width) - self.proc_rd_data_w = self.var("proc_rd_data_w", self._params.bank_data_width) - self.proc_rd_data_valid_w = self.var("proc_rd_data_valid_w", 1) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + self.proc_rd_data_w = self.var("proc_rd_data_w", self._params.bank_data_width) + self.proc_rd_data_valid_w = self.var("proc_rd_data_valid_w", 1) self.sram_cfg_wr_en_d = self.var("sram_cfg_wr_en_d", 1) self.sram_cfg_wr_strb_d = self.var("sram_cfg_wr_strb_d", self._params.bank_strb_width) @@ -118,8 +140,11 @@ def __init__(self, _params: GlobalBufferParams): self.sram_cfg_wr_data_d = self.var("sram_cfg_wr_data_d", self._params.bank_data_width) self.sram_cfg_rd_en_d = self.var("sram_cfg_rd_en_d", 1) self.sram_cfg_rd_addr_d = self.var("sram_cfg_rd_addr_d", self._params.glb_addr_width) - self.if_sram_cfg_rd_data_w = self.var("if_sram_cfg_rd_data_w", self._params.axi_data_width) - self.if_sram_cfg_rd_data_valid_w = self.var("if_sram_cfg_rd_data_valid_w", 1) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + self.if_sram_cfg_rd_data_w = self.var("if_sram_cfg_rd_data_w", self._params.axi_data_width) + self.if_sram_cfg_rd_data_valid_w = self.var("if_sram_cfg_rd_data_valid_w", 1) self.cgra_cfg_jtag_gc2glb_wr_en_d = self.var("cgra_cfg_jtag_gc2glb_wr_en_d", 1) self.cgra_cfg_jtag_gc2glb_rd_en_d = self.var("cgra_cfg_jtag_gc2glb_rd_en_d", 1) @@ -245,8 +270,25 @@ def __init__(self, _params: GlobalBufferParams): self.if_sram_cfg_list.append(self.interface( if_sram_cfg_tile2tile, f"if_sram_cfg_tile2tile_{i}")) - # Passthrough cgar_stall signals - self.wire(self.cgra_stall_in, self.cgra_stall) + if os.getenv('WHICH_SOC') == "amber": + # GLS pipeline + self.strm_g2f_start_pulse_d = self.var("strm_g2f_start_pulse_d", self._params.num_glb_tiles) + self.strm_f2g_start_pulse_d = self.var("strm_f2g_start_pulse_d", self._params.num_glb_tiles) + self.pcfg_start_pulse_d = self.var("pcfg_start_pulse_d", self._params.num_glb_tiles) + self.gls_in = concat(self.strm_g2f_start_pulse, self.strm_f2g_start_pulse, self.pcfg_start_pulse) + self.gls_out = concat(self.strm_g2f_start_pulse_d, self.strm_f2g_start_pulse_d, self.pcfg_start_pulse_d) + + self.gls_pipeline = Pipeline(width=self.gls_in.width, depth=self._params.gls_pipeline_depth) + self.add_child("gls_pipeline", + self.gls_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.gls_in, + out_=self.gls_out) + else: + # Passthrough cgar_stall signals + self.wire(self.cgra_stall_in, self.cgra_stall) # GLB Tiles self.glb_tile = [] @@ -265,8 +307,13 @@ def __init__(self, _params: GlobalBufferParams): self.add_always(self.sram_cfg_pipeline) self.add_always(self.left_edge_proc_wr_ff) self.add_always(self.left_edge_proc_rd_in_ff) - self.add_always(self.left_edge_proc_rd_out_logic) - self.add_always(self.left_edge_proc_rd_out_ff) + if os.getenv('WHICH_SOC') == "amber": + # self.add_always(self.left_edge_proc_rd_ff) => name changed to 'left_edge_proc_rd_in_ff' + self.add_always(self.left_edge_proc_rd_out) + else: + self.add_always(self.left_edge_proc_rd_out_logic) + self.add_always(self.left_edge_proc_rd_out_ff) + self.add_proc_clk_en() self.add_always(self.left_edge_cfg_ff) self.add_always(self.left_edge_cgra_cfg_ff) @@ -280,21 +327,28 @@ def __init__(self, _params: GlobalBufferParams): self.wire(self.if_cfg_rd_data, self.if_cfg_list[0].rd_data) self.wire(self.if_cfg_rd_data_valid, self.if_cfg_list[0].rd_data_valid) - # Add flush signal pipeline - self.flush_pipeline = Pipeline(width=self.data_flush.width, - depth=self._params.flush_crossbar_pipeline_depth) - self.add_child("flush_pipeline", - self.flush_pipeline, - clk=self.clk, - clk_en=const(1, 1), - reset=self.reset, - in_=self.data_flush, - out_=self.data_flush_d) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + # Add flush signal pipeline + self.flush_pipeline = Pipeline(width=self.data_flush.width, + depth=self._params.flush_crossbar_pipeline_depth) + self.add_child("flush_pipeline", + self.flush_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.data_flush, + out_=self.data_flush_d) # Add flush signal crossbar - flush_crossbar_in = self.var("flush_crossbar_in", 1, size=self._params.num_glb_tiles, packed=True) - for i in range(self._params.num_glb_tiles): - self.wire(flush_crossbar_in[i], self.data_flush_d[i]) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + flush_crossbar_in = self.var("flush_crossbar_in", 1, size=self._params.num_glb_tiles, packed=True) + for i in range(self._params.num_glb_tiles): + self.wire(flush_crossbar_in[i], self.data_flush_d[i]) + self.flush_crossbar = GlbCrossbar(width=1, num_input=self._params.num_glb_tiles, num_output=self._params.num_groups) self.flush_crossbar_sel_w = self.var("flush_crossbar_sel_w", clog2(self._params.num_glb_tiles), @@ -303,11 +357,18 @@ def __init__(self, _params: GlobalBufferParams): self.wire(self.flush_crossbar_sel_w[i], self.flush_crossbar_sel[(i + 1) * clog2(self._params.num_glb_tiles) - 1, i * clog2(self._params.num_glb_tiles)]) - self.add_child("flush_crossbar", - self.flush_crossbar, - in_=flush_crossbar_in, - sel_=self.flush_crossbar_sel_w, - out_=self.strm_data_flush_g2f) + if os.getenv('WHICH_SOC') == "amber": + self.add_child("flush_crossbar", + self.flush_crossbar, + in_=self.data_flush, + sel_=self.flush_crossbar_sel_w, + out_=self.strm_data_flush_g2f) + else: + self.add_child("flush_crossbar", + self.flush_crossbar, + in_=flush_crossbar_in, + sel_=self.flush_crossbar_sel_w, + out_=self.strm_data_flush_g2f) @always_ff((posedge, "clk"), (posedge, "reset")) def proc_pipeline(self): @@ -366,8 +427,10 @@ def add_proc_clk_en(self): self.wire(self.if_proc_list[0].wr_clk_en, self.proc_wr_clk_en) self.rd_clk_en_gen = GlbClkEnGen(cnt=2 * self._params.num_glb_tiles + self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin) - self.rd_clk_en_gen.p_cnt.value = 2 * self._params.num_glb_tiles + \ - self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin + if os.getenv('WHICH_SOC') == "amber": pass + else: + self.rd_clk_en_gen.p_cnt.value = 2 * self._params.num_glb_tiles + \ + self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin self.proc_rd_clk_en = self.var("proc_rd_clk_en", 1) self.add_child("proc_rd_clk_en_gen", self.rd_clk_en_gen, @@ -426,6 +489,7 @@ def left_edge_proc_rd_in_ff(self): self.proc_rd_type = self.proc_rd_type self.proc_rd_addr_sel = self.proc_rd_addr_sel + # Only used by onyx @always_comb def left_edge_proc_rd_out_logic(self): if self.proc_rd_type == self.proc_rd_type_e.axi: @@ -448,6 +512,7 @@ def left_edge_proc_rd_out_logic(self): self.if_sram_cfg_rd_data_w = 0 self.if_sram_cfg_rd_data_valid_w = 0 + # Only used by onyx @always_ff((posedge, "clk"), (posedge, "reset")) def left_edge_proc_rd_out_ff(self): if self.reset: @@ -461,6 +526,30 @@ def left_edge_proc_rd_out_ff(self): self.if_sram_cfg_rd_data = self.if_sram_cfg_rd_data_w self.if_sram_cfg_rd_data_valid = self.if_sram_cfg_rd_data_valid_w + # amber uses a different version + if os.getenv('WHICH_SOC') == "amber": + @ always_comb + def left_edge_proc_rd_out(self): + if self.proc_rd_type == self.proc_rd_type_e.axi: + self.proc_rd_data = self.if_proc_list[0].rd_data + self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid + self.if_sram_cfg_rd_data = 0 + self.if_sram_cfg_rd_data_valid = 0 + elif self.proc_rd_type == self.proc_rd_type_e.jtag: + self.proc_rd_data = 0 + self.proc_rd_data_valid = 0 + if self.proc_rd_addr_sel == 0: + self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width - 1, 0] + else: + self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width + * 2 - 1, self._params.axi_data_width] + self.if_sram_cfg_rd_data_valid = self.if_proc_list[0].rd_data_valid + else: + self.proc_rd_data = self.if_proc_list[0].rd_data + self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid + self.if_sram_cfg_rd_data = 0 + self.if_sram_cfg_rd_data_valid = 0 + @ always_ff((posedge, "clk"), (posedge, "reset")) def left_edge_cfg_ff(self): if self.reset: @@ -552,7 +641,180 @@ def tile2tile_w2e_cfg_wiring(self): self.cgra_cfg_pcfg_addr_w2e_wsti[i] = self.cgra_cfg_pcfg_addr_w2e_esto[i - 1] self.cgra_cfg_pcfg_data_w2e_wsti[i] = self.cgra_cfg_pcfg_data_w2e_esto[i - 1] - def add_glb_tile(self): + if os.getenv('WHICH_SOC') == "amber": + def add_glb_tile(self): + for i in range(self._params.num_glb_tiles): + self.add_child(f"glb_tile_gen_{i}", + self.glb_tile[i], + clk=self.clk, + clk_en_pcfg_broadcast=clock_en(~self.pcfg_broadcast_stall[i]), + clk_en_master=clock_en(self.glb_clk_en_master[i]), + clk_en_bank_master=clock_en(self.glb_clk_en_bank_master[i]), + reset=self.reset, + glb_tile_id=i, + + # proc + if_proc_est_m_wr_en=self.if_proc_list[i + 1].wr_en, + if_proc_est_m_wr_clk_en=self.if_proc_list[i + 1].wr_clk_en, + if_proc_est_m_wr_addr=self.if_proc_list[i + 1].wr_addr, + if_proc_est_m_wr_data=self.if_proc_list[i + 1].wr_data, + if_proc_est_m_wr_strb=self.if_proc_list[i + 1].wr_strb, + if_proc_est_m_rd_en=self.if_proc_list[i + 1].rd_en, + if_proc_est_m_rd_clk_en=self.if_proc_list[i + 1].rd_clk_en, + if_proc_est_m_rd_addr=self.if_proc_list[i + 1].rd_addr, + if_proc_est_m_rd_data=self.if_proc_list[i + 1].rd_data, + if_proc_est_m_rd_data_valid=self.if_proc_list[i + 1].rd_data_valid, + + if_proc_wst_s_wr_en=self.if_proc_list[i].wr_en, + if_proc_wst_s_wr_clk_en=self.if_proc_list[i].wr_clk_en, + if_proc_wst_s_wr_addr=self.if_proc_list[i].wr_addr, + if_proc_wst_s_wr_data=self.if_proc_list[i].wr_data, + if_proc_wst_s_wr_strb=self.if_proc_list[i].wr_strb, + if_proc_wst_s_rd_en=self.if_proc_list[i].rd_en, + if_proc_wst_s_rd_clk_en=self.if_proc_list[i].rd_clk_en, + if_proc_wst_s_rd_addr=self.if_proc_list[i].rd_addr, + if_proc_wst_s_rd_data=self.if_proc_list[i].rd_data, + if_proc_wst_s_rd_data_valid=self.if_proc_list[i].rd_data_valid, + + # strm + strm_wr_en_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_en'], + strm_wr_strb_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_strb'], + strm_wr_addr_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_addr'], + strm_wr_data_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_data'], + strm_rd_en_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_en'], + strm_rd_addr_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_addr'], + strm_rd_data_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data'], + strm_rd_data_valid_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_en'], + strm_wr_strb_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_strb'], + strm_wr_addr_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_addr'], + strm_wr_data_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_data'], + strm_rd_en_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_en'], + strm_rd_addr_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_addr'], + strm_rd_data_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data'], + strm_rd_data_valid_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_en'], + strm_wr_strb_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_strb'], + strm_wr_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_addr'], + strm_wr_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_data'], + strm_rd_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_en'], + strm_rd_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_addr'], + strm_rd_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data'], + strm_rd_data_valid_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_en'], + strm_wr_strb_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_strb'], + strm_wr_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_addr'], + strm_wr_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_data'], + strm_rd_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_en'], + strm_rd_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_addr'], + strm_rd_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data'], + strm_rd_data_valid_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], + + # pcfg + pcfg_rd_en_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_en'], + pcfg_rd_addr_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_addr'], + pcfg_rd_data_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_en'], + pcfg_rd_addr_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_addr'], + pcfg_rd_data_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_en'], + pcfg_rd_addr_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_addr'], + pcfg_rd_data_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_en'], + pcfg_rd_addr_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_addr'], + pcfg_rd_data_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], + + # cfg + if_cfg_est_m_wr_en=self.if_cfg_list[i + 1].wr_en, + if_cfg_est_m_wr_clk_en=self.if_cfg_list[i + 1].wr_clk_en, + if_cfg_est_m_wr_addr=self.if_cfg_list[i + 1].wr_addr, + if_cfg_est_m_wr_data=self.if_cfg_list[i + 1].wr_data, + if_cfg_est_m_rd_en=self.if_cfg_list[i + 1].rd_en, + if_cfg_est_m_rd_clk_en=self.if_cfg_list[i + 1].rd_clk_en, + if_cfg_est_m_rd_addr=self.if_cfg_list[i + 1].rd_addr, + if_cfg_est_m_rd_data=self.if_cfg_list[i + 1].rd_data, + if_cfg_est_m_rd_data_valid=self.if_cfg_list[i + 1].rd_data_valid, + + if_cfg_wst_s_wr_en=self.if_cfg_list[i].wr_en, + if_cfg_wst_s_wr_clk_en=self.if_cfg_list[i].wr_clk_en, + if_cfg_wst_s_wr_addr=self.if_cfg_list[i].wr_addr, + if_cfg_wst_s_wr_data=self.if_cfg_list[i].wr_data, + if_cfg_wst_s_rd_en=self.if_cfg_list[i].rd_en, + if_cfg_wst_s_rd_clk_en=self.if_cfg_list[i].rd_clk_en, + if_cfg_wst_s_rd_addr=self.if_cfg_list[i].rd_addr, + if_cfg_wst_s_rd_data=self.if_cfg_list[i].rd_data, + if_cfg_wst_s_rd_data_valid=self.if_cfg_list[i].rd_data_valid, + + cfg_tile_connected_wsti=self.cfg_tile_connected[i], + cfg_tile_connected_esto=self.cfg_tile_connected[i + 1], + cfg_pcfg_tile_connected_wsti=self.cfg_pcfg_tile_connected[i], + cfg_pcfg_tile_connected_esto=self.cfg_pcfg_tile_connected[i + 1], + + strm_data_f2g=self.strm_data_f2g[i], + strm_data_valid_f2g=self.strm_data_valid_f2g[i], + strm_data_g2f=self.strm_data_g2f[i], + strm_data_valid_g2f=self.strm_data_valid_g2f[i], + data_flush=self.data_flush[i], + + cgra_cfg_g2f_cfg_wr_en=self.cgra_cfg_g2f_cfg_wr_en[i], + cgra_cfg_g2f_cfg_rd_en=self.cgra_cfg_g2f_cfg_rd_en[i], + cgra_cfg_g2f_cfg_addr=self.cgra_cfg_g2f_cfg_addr[i], + cgra_cfg_g2f_cfg_data=self.cgra_cfg_g2f_cfg_data[i], + + cgra_cfg_pcfg_wr_en_w2e_wsti=self.cgra_cfg_pcfg_wr_en_w2e_wsti[i], + cgra_cfg_pcfg_rd_en_w2e_wsti=self.cgra_cfg_pcfg_rd_en_w2e_wsti[i], + cgra_cfg_pcfg_addr_w2e_wsti=self.cgra_cfg_pcfg_addr_w2e_wsti[i], + cgra_cfg_pcfg_data_w2e_wsti=self.cgra_cfg_pcfg_data_w2e_wsti[i], + + cgra_cfg_pcfg_wr_en_w2e_esto=self.cgra_cfg_pcfg_wr_en_w2e_esto[i], + cgra_cfg_pcfg_rd_en_w2e_esto=self.cgra_cfg_pcfg_rd_en_w2e_esto[i], + cgra_cfg_pcfg_addr_w2e_esto=self.cgra_cfg_pcfg_addr_w2e_esto[i], + cgra_cfg_pcfg_data_w2e_esto=self.cgra_cfg_pcfg_data_w2e_esto[i], + + cgra_cfg_pcfg_wr_en_e2w_esti=self.cgra_cfg_pcfg_wr_en_e2w_esti[i], + cgra_cfg_pcfg_rd_en_e2w_esti=self.cgra_cfg_pcfg_rd_en_e2w_esti[i], + cgra_cfg_pcfg_addr_e2w_esti=self.cgra_cfg_pcfg_addr_e2w_esti[i], + cgra_cfg_pcfg_data_e2w_esti=self.cgra_cfg_pcfg_data_e2w_esti[i], + + cgra_cfg_pcfg_wr_en_e2w_wsto=self.cgra_cfg_pcfg_wr_en_e2w_wsto[i], + cgra_cfg_pcfg_rd_en_e2w_wsto=self.cgra_cfg_pcfg_rd_en_e2w_wsto[i], + cgra_cfg_pcfg_addr_e2w_wsto=self.cgra_cfg_pcfg_addr_e2w_wsto[i], + cgra_cfg_pcfg_data_e2w_wsto=self.cgra_cfg_pcfg_data_e2w_wsto[i], + + cgra_cfg_jtag_wr_en_wsti=self.cgra_cfg_jtag_wr_en_wsti[i], + cgra_cfg_jtag_rd_en_wsti=self.cgra_cfg_jtag_rd_en_wsti[i], + cgra_cfg_jtag_addr_wsti=self.cgra_cfg_jtag_addr_wsti[i], + cgra_cfg_jtag_data_wsti=self.cgra_cfg_jtag_data_wsti[i], + + cgra_cfg_jtag_wr_en_esto=self.cgra_cfg_jtag_wr_en_esto[i], + cgra_cfg_jtag_rd_en_esto=self.cgra_cfg_jtag_rd_en_esto[i], + cgra_cfg_jtag_addr_esto=self.cgra_cfg_jtag_addr_esto[i], + cgra_cfg_jtag_data_esto=self.cgra_cfg_jtag_data_esto[i], + + cgra_cfg_jtag_rd_en_bypass_wsti=self.cgra_cfg_jtag_rd_en_bypass_wsti[i], + cgra_cfg_jtag_addr_bypass_wsti=self.cgra_cfg_jtag_addr_bypass_wsti[i], + cgra_cfg_jtag_rd_en_bypass_esto=self.cgra_cfg_jtag_rd_en_bypass_esto[i], + cgra_cfg_jtag_addr_bypass_esto=self.cgra_cfg_jtag_addr_bypass_esto[i], + + strm_g2f_start_pulse=self.strm_g2f_start_pulse_d[i], + strm_f2g_start_pulse=self.strm_f2g_start_pulse_d[i], + pcfg_start_pulse=self.pcfg_start_pulse_d[i], + strm_f2g_interrupt_pulse=self.strm_f2g_interrupt_pulse_w[i], + strm_g2f_interrupt_pulse=self.strm_g2f_interrupt_pulse_w[i], + pcfg_g2f_interrupt_pulse=self.pcfg_g2f_interrupt_pulse_w[i]) + + else: + def add_glb_tile(self): for i in range(self._params.num_glb_tiles): self.add_child(f"glb_tile_gen_{i}", self.glb_tile[i], From b2a30539a27fab0f7fe31a932f16e44be91552d2 Mon Sep 17 00:00:00 2001 From: steveri Date: Sat, 24 Dec 2022 12:21:39 -0800 Subject: [PATCH 52/63] unified amber/onyx gen_global_buffer_rdl.py --- global_buffer/gen_global_buffer_rdl.py | 62 ++++++++++++++++++++------ 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/global_buffer/gen_global_buffer_rdl.py b/global_buffer/gen_global_buffer_rdl.py index 3e84773299..1c4bce8a06 100644 --- a/global_buffer/gen_global_buffer_rdl.py +++ b/global_buffer/gen_global_buffer_rdl.py @@ -184,18 +184,27 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): st_dma_ctrl_r = Reg("st_dma_ctrl") st_dma_mode_f = Field("mode", 2) st_dma_ctrl_r.add_child(st_dma_mode_f) - st_dma_valid_mode_f = Field("valid_mode", 2) - st_dma_ctrl_r.add_child(st_dma_valid_mode_f) + + if os.getenv('WHICH_SOC') == "amber": + st_dma_use_valid_f = Field("use_valid", 1) + st_dma_ctrl_r.add_child(st_dma_use_valid_f) + else: + st_dma_valid_mode_f = Field("valid_mode", 2) + st_dma_ctrl_r.add_child(st_dma_valid_mode_f) + st_dma_data_mux_f = Field("data_mux", 2) st_dma_ctrl_r.add_child(st_dma_data_mux_f) st_dma_num_repeat_f = Field("num_repeat", clog2(params.queue_depth) + 1) st_dma_ctrl_r.add_child(st_dma_num_repeat_f) addr_map.add_child(st_dma_ctrl_r) - st_dma_num_blocks_r = Reg("st_dma_num_blocks") - st_dma_num_blocks_f = Field("value", params.axi_data_width) - st_dma_num_blocks_r.add_child(st_dma_num_blocks_f) - addr_map.add_child(st_dma_num_blocks_r) + if os.getenv('WHICH_SOC') == "amber": + pass + else: + st_dma_num_blocks_r = Reg("st_dma_num_blocks") + st_dma_num_blocks_f = Field("value", params.axi_data_width) + st_dma_num_blocks_r.add_child(st_dma_num_blocks_f) + addr_map.add_child(st_dma_num_blocks_r) # Store DMA Header if params.queue_depth == 1: @@ -205,7 +214,10 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): # dim reg dim_r = Reg(f"dim") - dim_f = Field(f"dim", width=clog2(params.store_dma_loop_level) + 1) + if os.getenv('WHICH_SOC') == "amber": + dim_f = Field(f"dim", width=clog2(params.loop_level) + 1) + else: + dim_f = Field(f"dim", width=clog2(params.store_dma_loop_level) + 1) dim_r.add_child(dim_f) st_dma_header_rf.add_child(dim_r) @@ -222,7 +234,12 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): st_dma_header_rf.add_child(cycle_start_addr_r) # num_word reg - for i in range(params.store_dma_loop_level): + if os.getenv('WHICH_SOC') == "amber": + LL = params.loop_level + else: + LL = params.store_dma_loop_level + + for i in range(LL): range_r = Reg(f"range_{i}") range_f = Field("range", width=params.axi_data_width) range_r.add_child(range_f) @@ -242,10 +259,18 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): ld_dma_ctrl_r = Reg("ld_dma_ctrl") ld_dma_mode_f = Field("mode", 2) ld_dma_ctrl_r.add_child(ld_dma_mode_f) - ld_dma_valid_mode_f = Field("valid_mode", 2) - ld_dma_ctrl_r.add_child(ld_dma_valid_mode_f) - ld_dma_flush_mode_f = Field("flush_mode", 1) - ld_dma_ctrl_r.add_child(ld_dma_flush_mode_f) + + if os.getenv('WHICH_SOC') == "amber": + ld_dma_use_valid_f = Field("use_valid", 1) + ld_dma_ctrl_r.add_child(ld_dma_use_valid_f) + ld_dma_use_flush_f = Field("use_flush", 1) + ld_dma_ctrl_r.add_child(ld_dma_use_flush_f) + else: + ld_dma_valid_mode_f = Field("valid_mode", 2) + ld_dma_ctrl_r.add_child(ld_dma_valid_mode_f) + ld_dma_flush_mode_f = Field("flush_mode", 1) + ld_dma_ctrl_r.add_child(ld_dma_flush_mode_f) + ld_dma_data_mux_f = Field("data_mux", 2) ld_dma_ctrl_r.add_child(ld_dma_data_mux_f) ld_dma_num_repeat_f = Field("num_repeat", clog2(params.queue_depth) + 1) @@ -265,7 +290,11 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): # dim reg dim_r = Reg(f"dim") - dim_f = Field(f"dim", width=clog2(params.load_dma_loop_level) + 1) + if os.getenv('WHICH_SOC') == "amber": + dim_f = Field(f"dim", width=clog2(params.loop_level) + 1) + else: + dim_f = Field(f"dim", width=clog2(params.load_dma_loop_level) + 1) + dim_r.add_child(dim_f) ld_dma_header_rf.add_child(dim_r) @@ -282,7 +311,12 @@ def gen_global_buffer_rdl(name, params: GlobalBufferParams): ld_dma_header_rf.add_child(cycle_start_addr_r) # num_word reg - for i in range(params.load_dma_loop_level): + if os.getenv('WHICH_SOC') == "amber": + LL = params.loop_level + else: + LL = params.load_dma_loop_level + + for i in range(LL): range_r = Reg(f"range_{i}") range_f = Field("range", width=params.axi_data_width) range_r.add_child(range_f) From 1435267af2329440dbc66efb908b26553b844684 Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 29 Dec 2022 16:23:16 -0800 Subject: [PATCH 53/63] merged design/glb_addr_gen.py --- global_buffer/design/glb_addr_gen.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/global_buffer/design/glb_addr_gen.py b/global_buffer/design/glb_addr_gen.py index dc53b61d0e..407b83ceb1 100644 --- a/global_buffer/design/glb_addr_gen.py +++ b/global_buffer/design/glb_addr_gen.py @@ -1,3 +1,4 @@ +import os from kratos import always_ff, posedge, Generator, clog2 from global_buffer.design.global_buffer_parameter import GlobalBufferParams @@ -6,11 +7,18 @@ class GlbAddrGen(Generator): ''' Generate addresses ''' def __init__(self, _params: GlobalBufferParams, loop_level: int): - super().__init__(f"glb_addr_gen_{loop_level}") + if os.getenv('WHICH_SOC') == "amber": + super().__init__(f"glb_addr_gen") + else: + super().__init__(f"glb_addr_gen_{loop_level}") self._params = _params self.p_addr_width = self.param("addr_width", width=32, value=32) - self.p_loop_level = self.param("loop_level", width=32, value=self._params.loop_level) - self.loop_level = loop_level + if os.getenv('WHICH_SOC') == "amber": + self.p_loop_level = self._params.loop_level + self.loop_level = self._params.loop_level + else: + self.p_loop_level = self.param("loop_level", width=32, value=self._params.loop_level) + self.loop_level = loop_level self.clk = self.clock("clk") self.clk_en = self.clock_en("clk_en") From f213b69f1f6d9ab7acdd3e732c9ac78c1ab1610e Mon Sep 17 00:00:00 2001 From: steveri Date: Thu, 29 Dec 2022 17:01:50 -0800 Subject: [PATCH 54/63] merged glb_header.py --- global_buffer/design/glb_header.py | 85 +++++++++++++++++++----------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/global_buffer/design/glb_header.py b/global_buffer/design/glb_header.py index cf10947803..686bb71ceb 100644 --- a/global_buffer/design/glb_header.py +++ b/global_buffer/design/glb_header.py @@ -1,3 +1,4 @@ +import os from kratos import PackedStruct, clog2, enum from global_buffer.design.global_buffer_parameter import GlobalBufferParams @@ -14,36 +15,60 @@ def __init__(self, _params: GlobalBufferParams): [("tile_connected", 1), ("latency", self._params.pcfg_latency_width)]) - self.cfg_store_dma_ctrl_t = PackedStruct("store_dma_ctrl_t", - [("mode", 2), - ("valid_mode", 2), - ("data_mux", 2), - ("num_repeat", clog2(self._params.queue_depth) + 1)]) - - self.cfg_load_dma_ctrl_t = PackedStruct("load_dma_ctrl_t", - [("mode", 2), - ("valid_mode", 2), - ("flush_mode", 1), - ("data_mux", 2), - ("num_repeat", clog2(self._params.queue_depth) + 1)]) - - load_dma_header_struct_list = [("start_addr", self._params.glb_addr_width), - ("cycle_start_addr", self._params.cycle_count_width)] - load_dma_header_struct_list += [("dim", 1 + clog2(self._params.load_dma_loop_level))] - for i in range(self._params.load_dma_loop_level): - load_dma_header_struct_list += [(f"range_{i}", self._params.axi_data_width), - (f"stride_{i}", self._params.glb_addr_width + 1), - (f"cycle_stride_{i}", self._params.cycle_count_width)] - self.cfg_load_dma_header_t = PackedStruct("load_dma_header_t", load_dma_header_struct_list) - - store_dma_header_struct_list = [("start_addr", self._params.glb_addr_width), - ("cycle_start_addr", self._params.cycle_count_width)] - store_dma_header_struct_list += [("dim", 1 + clog2(self._params.store_dma_loop_level))] - for i in range(self._params.store_dma_loop_level): - store_dma_header_struct_list += [(f"range_{i}", self._params.axi_data_width), - (f"stride_{i}", self._params.glb_addr_width + 1), - (f"cycle_stride_{i}", self._params.cycle_count_width)] - self.cfg_store_dma_header_t = PackedStruct("store_dma_header_t", store_dma_header_struct_list) + if os.getenv('WHICH_SOC') == "amber": + self.cfg_store_dma_ctrl_t = PackedStruct("store_dma_ctrl_t", + [("mode", 2), + ("use_valid", 1), + ("data_mux", 2), + ("num_repeat", clog2(self._params.queue_depth) + 1)]) + + self.cfg_load_dma_ctrl_t = PackedStruct("load_dma_ctrl_t", + [("mode", 2), + ("use_valid", 1), + ("use_flush", 1), + ("data_mux", 2), + ("num_repeat", clog2(self._params.queue_depth) + 1)]) + + dma_header_struct_list = [("start_addr", self._params.glb_addr_width), + ("cycle_start_addr", self._params.cycle_count_width)] + dma_header_struct_list += [("dim", 1 + clog2(self._params.loop_level))] + for i in range(self._params.loop_level): + dma_header_struct_list += [(f"range_{i}", self._params.axi_data_width), + (f"stride_{i}", self._params.glb_addr_width + 1), + (f"cycle_stride_{i}", self._params.cycle_count_width)] + self.cfg_dma_header_t = PackedStruct("dma_header_t", dma_header_struct_list) + else: + self.cfg_store_dma_ctrl_t = PackedStruct("store_dma_ctrl_t", + [("mode", 2), + ("valid_mode", 2), + ("data_mux", 2), + ("num_repeat", clog2(self._params.queue_depth) + 1)]) + + self.cfg_load_dma_ctrl_t = PackedStruct("load_dma_ctrl_t", + [("mode", 2), + ("valid_mode", 2), + ("flush_mode", 1), + ("data_mux", 2), + ("num_repeat", clog2(self._params.queue_depth) + 1)]) + + load_dma_header_struct_list = [("start_addr", self._params.glb_addr_width), + ("cycle_start_addr", self._params.cycle_count_width)] + load_dma_header_struct_list += [("dim", 1 + clog2(self._params.load_dma_loop_level))] + for i in range(self._params.load_dma_loop_level): + load_dma_header_struct_list += [(f"range_{i}", self._params.axi_data_width), + (f"stride_{i}", self._params.glb_addr_width + 1), + (f"cycle_stride_{i}", self._params.cycle_count_width)] + self.cfg_load_dma_header_t = PackedStruct("load_dma_header_t", load_dma_header_struct_list) + + store_dma_header_struct_list = [("start_addr", self._params.glb_addr_width), + ("cycle_start_addr", self._params.cycle_count_width)] + store_dma_header_struct_list += [("dim", 1 + clog2(self._params.store_dma_loop_level))] + for i in range(self._params.store_dma_loop_level): + store_dma_header_struct_list += [(f"range_{i}", self._params.axi_data_width), + (f"stride_{i}", self._params.glb_addr_width + 1), + (f"cycle_stride_{i}", self._params.cycle_count_width)] + self.cfg_store_dma_header_t = PackedStruct("store_dma_header_t", store_dma_header_struct_list) + # pcfg dma header self.cfg_pcfg_dma_ctrl_t = PackedStruct("pcfg_dma_ctrl_t", From cfd67419e124e875ce7c1acacb950cadb67c770f Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 30 Dec 2022 07:27:08 -0800 Subject: [PATCH 55/63] another glb_header.py merge --- global_buffer/design/glb_header.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/global_buffer/design/glb_header.py b/global_buffer/design/glb_header.py index 686bb71ceb..ff8bd30aa3 100644 --- a/global_buffer/design/glb_header.py +++ b/global_buffer/design/glb_header.py @@ -37,6 +37,10 @@ def __init__(self, _params: GlobalBufferParams): (f"stride_{i}", self._params.glb_addr_width + 1), (f"cycle_stride_{i}", self._params.cycle_count_width)] self.cfg_dma_header_t = PackedStruct("dma_header_t", dma_header_struct_list) + + # All headers same for amber version + self.cfg_load_dma_header_t = PackedStruct("dma_header_t", dma_header_struct_list) + self.cfg_store_dma_header_t = PackedStruct("dma_header_t", dma_header_struct_list) else: self.cfg_store_dma_ctrl_t = PackedStruct("store_dma_ctrl_t", [("mode", 2), From 46859f0b5642c45c334c888c36a59ff3c29bdda9 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 30 Dec 2022 14:59:34 -0800 Subject: [PATCH 56/63] merged glb_cfg.py --- global_buffer/design/glb_cfg.py | 62 +++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/global_buffer/design/glb_cfg.py b/global_buffer/design/glb_cfg.py index 69c5b99648..495cce2813 100644 --- a/global_buffer/design/glb_cfg.py +++ b/global_buffer/design/glb_cfg.py @@ -51,9 +51,11 @@ def __init__(self, _params: GlobalBufferParams): # pcfg broadcast self.cfg_pcfg_broadcast_mux = self.output("cfg_pcfg_broadcast_mux", self.header.cfg_pcfg_broadcast_mux_t) - # rdy/vld number of blocks - # self.cfg_ld_dma_num_blocks = self.output("cfg_ld_dma_num_blocks", self._params.axi_data_width) - self.cfg_st_dma_num_blocks = self.output("cfg_st_dma_num_blocks", self._params.axi_data_width) + if os.getenv('WHICH_SOC') == "amber": pass + else: + # rdy/vld number of blocks + # self.cfg_ld_dma_num_blocks = self.output("cfg_ld_dma_num_blocks", self._params.axi_data_width) + self.cfg_st_dma_num_blocks = self.output("cfg_st_dma_num_blocks", self._params.axi_data_width) self.glb_pio_wrapper = self.get_glb_pio_wrapper() self.add_child("glb_pio", self.glb_pio_wrapper) @@ -113,12 +115,20 @@ def wire_config_signals(self): self.wire(self.cfg_pcfg_network['latency'], self.glb_pio_wrapper.ports[f"l2h_pcfg_network_latency_value_r"]) - self.wire(self.cfg_st_dma_ctrl['data_mux'], - self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_data_mux_r"]) - self.wire(self.cfg_st_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_mode_r"]) - self.wire(self.cfg_st_dma_ctrl['valid_mode'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_valid_mode_r"]) - self.wire(self.cfg_st_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_num_repeat_r"]) - self.wire(self.cfg_st_dma_num_blocks, self.glb_pio_wrapper.ports[f"l2h_st_dma_num_blocks_value_r"]) + if os.getenv('WHICH_SOC') == "amber": + self.wire(self.cfg_st_dma_ctrl['data_mux'], + self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_data_mux_r"]) + self.wire(self.cfg_st_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_mode_r"]) + self.wire(self.cfg_st_dma_ctrl['use_valid'], + self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_use_valid_r"]) + self.wire(self.cfg_st_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_num_repeat_r"]) + else: + self.wire(self.cfg_st_dma_ctrl['data_mux'], + self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_data_mux_r"]) + self.wire(self.cfg_st_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_mode_r"]) + self.wire(self.cfg_st_dma_ctrl['valid_mode'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_valid_mode_r"]) + self.wire(self.cfg_st_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_st_dma_ctrl_num_repeat_r"]) + self.wire(self.cfg_st_dma_num_blocks, self.glb_pio_wrapper.ports[f"l2h_st_dma_num_blocks_value_r"]) for i in range(self._params.queue_depth): if self._params.queue_depth == 1: @@ -131,7 +141,10 @@ def wire_config_signals(self): self.glb_pio_wrapper.ports[f"l2h_st_dma_header_{i}_cycle_start_addr_cycle_start_addr_r"]) self.wire(current_header['dim'], self.glb_pio_wrapper.ports[f"l2h_st_dma_header_{i}_dim_dim_r"]) - for j in range(self._params.store_dma_loop_level): + + if os.getenv('WHICH_SOC') == "amber": LL = self._params.loop_level + else: LL = self._params.store_dma_loop_level + for j in range(LL): self.wire(current_header[f"cycle_stride_{j}"], self.glb_pio_wrapper.ports[f"l2h_st_dma_header_{i}_cycle_stride_{j}_cycle_stride_r"]) self.wire(current_header[f"stride_{j}"], @@ -139,13 +152,23 @@ def wire_config_signals(self): self.wire(current_header[f"range_{j}"], self.glb_pio_wrapper.ports[f"l2h_st_dma_header_{i}_range_{j}_range_r"]) - self.wire(self.cfg_ld_dma_ctrl['data_mux'], - self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_data_mux_r"]) - self.wire(self.cfg_ld_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_mode_r"]) - self.wire(self.cfg_ld_dma_ctrl['valid_mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_valid_mode_r"]) - self.wire(self.cfg_ld_dma_ctrl['flush_mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_flush_mode_r"]) - self.wire(self.cfg_ld_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_num_repeat_r"]) - # self.wire(self.cfg_ld_dma_num_blocks, self.glb_pio_wrapper.ports[f"l2h_ld_dma_num_blocks_value_r"]) + if os.getenv('WHICH_SOC') == "amber": + self.wire(self.cfg_ld_dma_ctrl['data_mux'], + self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_data_mux_r"]) + self.wire(self.cfg_ld_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_mode_r"]) + self.wire(self.cfg_ld_dma_ctrl['use_valid'], + self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_use_valid_r"]) + self.wire(self.cfg_ld_dma_ctrl['use_flush'], + self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_use_flush_r"]) + self.wire(self.cfg_ld_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_num_repeat_r"]) + else: + self.wire(self.cfg_ld_dma_ctrl['data_mux'], + self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_data_mux_r"]) + self.wire(self.cfg_ld_dma_ctrl['mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_mode_r"]) + self.wire(self.cfg_ld_dma_ctrl['valid_mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_valid_mode_r"]) + self.wire(self.cfg_ld_dma_ctrl['flush_mode'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_flush_mode_r"]) + self.wire(self.cfg_ld_dma_ctrl['num_repeat'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_ctrl_num_repeat_r"]) + # self.wire(self.cfg_ld_dma_num_blocks, self.glb_pio_wrapper.ports[f"l2h_ld_dma_num_blocks_value_r"]) for i in range(self._params.queue_depth): if self._params.queue_depth == 1: @@ -158,7 +181,10 @@ def wire_config_signals(self): self.glb_pio_wrapper.ports[f"l2h_ld_dma_header_{i}_cycle_start_addr_cycle_start_addr_r"]) self.wire(current_header['dim'], self.glb_pio_wrapper.ports[f"l2h_ld_dma_header_{i}_dim_dim_r"]) - for j in range(self._params.load_dma_loop_level): + + if os.getenv('WHICH_SOC') == "amber": LL = self._params.loop_level + else: LL = self._params.load_dma_loop_level + for j in range(LL): self.wire(current_header[f"cycle_stride_{j}"], self.glb_pio_wrapper.ports[f"l2h_ld_dma_header_{i}_cycle_stride_{j}_cycle_stride_r"]) self.wire(current_header[f"stride_{j}"], From b5defae3dceab6fca4ddedf8cd48920bf2d918c7 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 30 Dec 2022 15:01:40 -0800 Subject: [PATCH 57/63] (re)merge glb_bank_sram_gen.py --- global_buffer/design/glb_bank_sram_gen.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/global_buffer/design/glb_bank_sram_gen.py b/global_buffer/design/glb_bank_sram_gen.py index 50d2660114..f761126d4e 100644 --- a/global_buffer/design/glb_bank_sram_gen.py +++ b/global_buffer/design/glb_bank_sram_gen.py @@ -8,12 +8,7 @@ class GlbBankSramGen(Generator): def __init__(self, addr_width, _params: GlobalBufferParams): - # FIXME this TSMC/GF fix might go away after the smoke - # clears, but for now it gets us closer to a common master - if _params.process == "TSMC": - super().__init__("glb_bank_sram_gen") - elif _params.process == "GF": - super().__init__(f"glb_bank_sram_gen_{addr_width}") + super().__init__(f"glb_bank_sram_gen_{addr_width}") self._params = _params self.addr_width = addr_width From 30d5a4a46abec7e4e5d15f089582da34de1a8436 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 30 Dec 2022 16:28:53 -0800 Subject: [PATCH 58/63] merged glb_loop_iter.py --- global_buffer/design/glb_loop_iter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/global_buffer/design/glb_loop_iter.py b/global_buffer/design/glb_loop_iter.py index 11339ef0e1..5d43e9aef2 100644 --- a/global_buffer/design/glb_loop_iter.py +++ b/global_buffer/design/glb_loop_iter.py @@ -1,5 +1,6 @@ from kratos import Generator, clog2, always_ff, always_comb, posedge, const from global_buffer.design.global_buffer_parameter import GlobalBufferParams +import os class GlbLoopIter(Generator): @@ -9,6 +10,8 @@ def __init__(self, _params: GlobalBufferParams, loop_level: int): super().__init__(f"glb_loop_iter_{loop_level}") self._params = _params self.loop_level = loop_level + if os.getenv('WHICH_SOC') == "amber": + self.loop_level = self._params.loop_level # INPUTS self.clk = self.clock("clk") From 57bf45366c5052b7e61757246311beb7c5135bf2 Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 4 Jan 2023 13:15:05 -0800 Subject: [PATCH 59/63] new design_amber dir for amber compatibility --- global_buffer/design_amber/glb_load_dma.py | 523 +++++++++++++++ global_buffer/design_amber/glb_store_dma.py | 454 +++++++++++++ global_buffer/design_amber/glb_tile.py | 671 ++++++++++++++++++++ 3 files changed, 1648 insertions(+) create mode 100644 global_buffer/design_amber/glb_load_dma.py create mode 100644 global_buffer/design_amber/glb_store_dma.py create mode 100644 global_buffer/design_amber/glb_tile.py diff --git a/global_buffer/design_amber/glb_load_dma.py b/global_buffer/design_amber/glb_load_dma.py new file mode 100644 index 0000000000..7168af2d6a --- /dev/null +++ b/global_buffer/design_amber/glb_load_dma.py @@ -0,0 +1,523 @@ +from kratos import Generator, always_ff, always_comb, posedge, resize, clog2, ext, const +from global_buffer.design.glb_loop_iter import GlbLoopIter +from global_buffer.design.glb_sched_gen import GlbSchedGen +from global_buffer.design.glb_addr_gen import GlbAddrGen +from global_buffer.design.pipeline import Pipeline +from global_buffer.design.global_buffer_parameter import GlobalBufferParams +from global_buffer.design.glb_header import GlbHeader +from global_buffer.design.glb_clk_en_gen import GlbClkEnGen + + +class GlbLoadDma(Generator): + def __init__(self, _params: GlobalBufferParams): + super().__init__("glb_load_dma") + self._params = _params + self.header = GlbHeader(self._params) + assert self._params.bank_data_width == self._params.cgra_data_width * 4 + # Should hold for both onyx and amber + # assert self._params.tile2sram_rd_delay >= self._params.flush_crossbar_pipeline_depth + + self.clk = self.clock("clk") + self.reset = self.reset("reset") + self.glb_tile_id = self.input("glb_tile_id", self._params.tile_sel_addr_width) + + self.data_g2f = self.output("data_g2f", width=self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.data_valid_g2f = self.output("data_valid_g2f", 1, size=self._params.cgra_per_glb, packed=True) + self.data_flush = self.output("data_flush", 1) + + self.rdrq_packet_dma2bank = self.output("rdrq_packet_dma2bank", self.header.rdrq_packet_t) + self.rdrq_packet_dma2ring = self.output("rdrq_packet_dma2ring", self.header.rdrq_packet_t) + self.rdrs_packet_bank2dma = self.input("rdrs_packet_bank2dma", self.header.rdrs_packet_t) + self.rdrs_packet_ring2dma = self.input("rdrs_packet_ring2dma", self.header.rdrs_packet_t) + + self.cfg_tile_connected_prev = self.input("cfg_tile_connected_prev", 1) + self.cfg_tile_connected_next = self.input("cfg_tile_connected_next", 1) + self.cfg_ld_dma_num_repeat = self.input("cfg_ld_dma_num_repeat", clog2(self._params.queue_depth) + 1) + self.cfg_ld_dma_ctrl_use_valid = self.input("cfg_ld_dma_ctrl_use_valid", 1) + self.cfg_ld_dma_ctrl_use_flush = self.input("cfg_ld_dma_ctrl_use_flush", 1) + self.cfg_ld_dma_ctrl_mode = self.input("cfg_ld_dma_ctrl_mode", 2) + self.cfg_data_network_latency = self.input("cfg_data_network_latency", self._params.latency_width) + # Amber cfg_dma_header_t == cfg_load_dma_header_t == cfg_store_dma_header_t, see glb_header.py + self.cfg_ld_dma_header = self.input( + "cfg_ld_dma_header", self.header.cfg_dma_header_t, size=self._params.queue_depth) + self.cfg_data_network_g2f_mux = self.input("cfg_data_network_g2f_mux", self._params.cgra_per_glb) + + self.clk_en_dma2bank = self.output("clk_en_dma2bank", 1) + self.ld_dma_start_pulse = self.input("ld_dma_start_pulse", 1) + self.ld_dma_done_interrupt = self.output("ld_dma_done_interrupt", 1) + + # local variables + self.data_flush_w = self.var("data_flush_w", 1) + self.rdrq_packet_dma2bank_w = self.var("rdrq_packet_dma2bank_w", self.header.rdrq_packet_t) + self.rdrq_packet_dma2ring_w = self.var("rdrq_packet_dma2ring_w", self.header.rdrq_packet_t) + self.rdrs_packet = self.var("rdrs_packet", self.header.rdrs_packet_t) + self.data_g2f_w = self.var("data_g2f_w", width=self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.data_valid_g2f_w = self.var("data_valid_g2f_w", 1, size=self._params.cgra_per_glb, packed=True) + + # UNUSED by amber + # self.data_g2f_vld_w = self.var("data_g2f_vld_w", 1, size=self._params.cgra_per_glb, packed=True) + # self.ctrl_g2f_w = self.var("ctrl_g2f_w", 1, size=self._params.cgra_per_glb, packed=True) + + self.ld_dma_done_pulse = self.var("ld_dma_done_pulse", 1) + + # UNUSED by amber + # self.ld_dma_done_pulse_latch = self.var("ld_dma_done_pulse_latch", 1) + # self.ld_dma_done_pulse_anded = self.var("ld_dma_done_pulse_anded", 1) + + self.ld_dma_done_pulse_last = self.var("ld_dma_done_pulse_last", 1) + + # UNUSED by amber + # self.ld_dma_done_pulse_pipeline_out = self.var("ld_dma_done_pulse_pipeline_out", 1) + + self.strm_data = self.var("strm_data", self._params.cgra_data_width) + self.strm_data_muxed = self.var("strm_data_muxed", self._params.cgra_data_width) + self.strm_data_valid = self.var("strm_data_valid", 1) + self.strm_data_valid_muxed = self.var("strm_data_valid_muxed", 1) + self.strm_data_sel_w = self.var( + "strm_data_sel_w", self._params.bank_byte_offset - self._params.cgra_byte_offset) + self.strm_data_sel = self.var("strm_data_sel", self._params.bank_byte_offset - self._params.cgra_byte_offset) + + self.strm_rd_en_w = self.var("strm_rd_en_w", 1) + self.strm_rd_addr_w = self.var("strm_rd_addr_w", self._params.glb_addr_width) + self.last_strm_rd_addr_r = self.var("last_strm_rd_addr_r", self._params.glb_addr_width) + + self.ld_dma_start_pulse_next = self.var("ld_dma_start_pulse_next", 1) + self.ld_dma_start_pulse_r = self.var("ld_dma_start_pulse_r", 1) + self.is_first = self.var("is_first", 1) + + self.ld_dma_done_pulse_w = self.var("ld_dma_done_pulse_w", 1) + + self.is_cached = self.var("is_cached", 1) + self.bank_rdrq_rd_en = self.var("bank_rdrq_rd_en", 1) + self.bank_rdrq_rd_addr = self.var("bank_rdrq_rd_addr", self._params.glb_addr_width) + self.bank_rdrs_data_cache_r = self.var("bank_rdrs_data_cache_r", self._params.bank_data_width) + + self.strm_run = self.var("strm_run", 1) + self.loop_done = self.var("loop_done", 1) + self.cycle_valid = self.var("cycle_valid", 1) + self.cycle_count = self.var("cycle_count", self._params.cycle_count_width) + self.cycle_current_addr = self.var("cycle_current_addr", self._params.cycle_count_width) + self.data_current_addr = self.var("data_current_addr", self._params.glb_addr_width + 1) + # AMBER loop_level == load_dma_loop_level + self.loop_mux_sel = self.var("loop_mux_sel", clog2(self._params.loop_level)) + self.repeat_cnt = self.var("repeat_cnt", clog2(self._params.queue_depth) + 1) + + if self._params.queue_depth != 1: + self.queue_sel_r = self.var("queue_sel_r", max(1, clog2(self.repeat_cnt.width))) + + # Current dma header + self.current_dma_header = self.var("current_dma_header", self.header.cfg_dma_header_t) + if self._params.queue_depth == 1: + self.wire(self.cfg_ld_dma_header, self.current_dma_header) + else: + self.wire(self.cfg_ld_dma_header[self.queue_sel_r], self.current_dma_header) + + if self._params.queue_depth != 1: + self.add_always(self.queue_sel_ff) + + self.add_always(self.repeat_cnt_ff) + self.add_always(self.cycle_counter) + self.add_always(self.is_first_ff) + self.add_always(self.strm_run_ff) + self.add_strm_data_start_pulse_pipeline() + self.add_strm_rd_en_pipeline() + self.add_strm_data_sel_pipeline() + self.add_always(self.ld_dma_start_pulse_logic) + self.add_always(self.ld_dma_start_pulse_ff) + self.add_always(self.strm_data_mux) + self.add_always(self.data_g2f_logic) + self.add_always(self.data_g2f_ff) + self.add_always(self.ld_dma_done_pulse_logic) + self.add_always(self.strm_rdrq_packet_logic) + self.add_always(self.last_strm_rd_addr_ff) + self.add_always(self.rdrq_packet_logic) + self.add_always(self.rdrq_packet_ff) + self.add_always(self.bank_rdrq_packet_logic) + self.add_always(self.rdrs_packet_logic) + self.add_always(self.bank_rdrs_data_cache_ff) + self.add_always(self.strm_data_logic) + self.add_ld_dma_done_pulse_pipeline() + self.add_done_pulse_last_pipeline() + self.add_always(self.interrupt_ff) + self.add_always(self.data_flush_ff) + self.add_always(self.data_flush_logic) + self.add_dma2bank_clk_en() + + # Loop iteration shared for cycle and data + self.loop_iter = GlbLoopIter(self._params, loop_level=self._params.load_dma_loop_level) + self.add_child("loop_iter", + self.loop_iter, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + step=self.cycle_valid, + mux_sel_out=self.loop_mux_sel, + restart=self.loop_done) + self.wire(self.loop_iter.dim, self.current_dma_header[f"dim"]) + for i in range(self._params.loop_level): + self.wire(self.loop_iter.ranges[i], self.current_dma_header[f"range_{i}"]) + + # Cycle stride + self.cycle_stride_sched_gen = GlbSchedGen(self._params) + self.add_child("cycle_stride_sched_gen", + self.cycle_stride_sched_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.ld_dma_start_pulse_r, + cycle_count=self.cycle_count, + current_addr=self.cycle_current_addr, + finished=self.loop_done, + valid_output=self.cycle_valid) + + self.cycle_stride_addr_gen = GlbAddrGen(self._params, loop_level=self._params.load_dma_loop_level) + self.cycle_stride_addr_gen.p_addr_width.value = self._params.cycle_count_width + self.add_child("cycle_stride_addr_gen", + self.cycle_stride_addr_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.ld_dma_start_pulse_r, + step=self.cycle_valid, + mux_sel=self.loop_mux_sel, + addr_out=self.cycle_current_addr) + self.wire(self.cycle_stride_addr_gen.start_addr, self.current_dma_header[f"cycle_start_addr"]) + for i in range(self._params.loop_level): + self.wire(self.cycle_stride_addr_gen.strides[i], + self.current_dma_header[f"cycle_stride_{i}"]) + + # Data stride + self.data_stride_addr_gen = GlbAddrGen(self._params, loop_level=self._params.load_dma_loop_level) + self.data_stride_addr_gen.p_addr_width.value = self._params.glb_addr_width + 1 + self.add_child("data_stride_addr_gen", + self.data_stride_addr_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.ld_dma_start_pulse_r, + step=self.cycle_valid, + mux_sel=self.loop_mux_sel, + addr_out=self.data_current_addr) + self.wire(self.data_stride_addr_gen.start_addr, ext(self.current_dma_header[f"start_addr"], + self._params.glb_addr_width + 1)) + for i in range(self._params.loop_level): + self.wire(self.data_stride_addr_gen.strides[i], self.current_dma_header[f"stride_{i}"]) + + @always_ff((posedge, "clk"), (posedge, "reset")) + def queue_sel_ff(self): + if self.reset: + self.queue_sel_r = 0 + else: + if self.cfg_ld_dma_ctrl_mode == 3: + if self.ld_dma_done_pulse: + if (self.repeat_cnt + 1) < self.cfg_ld_dma_num_repeat: + self.queue_sel_r = self.queue_sel_r + 1 + else: + self.queue_sel_r = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def repeat_cnt_ff(self): + if self.reset: + self.repeat_cnt = 0 + else: + if self.cfg_ld_dma_ctrl_mode == 2: + if self.ld_dma_done_pulse: + if (self.repeat_cnt + 1) < self.cfg_ld_dma_num_repeat: + self.repeat_cnt += 1 + elif self.cfg_ld_dma_ctrl_mode == 3: + if self.ld_dma_done_pulse: + if (((self.repeat_cnt + 1) < self.cfg_ld_dma_num_repeat) + & ((self.repeat_cnt + 1) < self._params.queue_depth)): + self.repeat_cnt += 1 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def is_first_ff(self): + if self.reset: + self.is_first = 0 + else: + if self.ld_dma_start_pulse_r: + self.is_first = 1 + elif self.bank_rdrq_rd_en: + self.is_first = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def strm_run_ff(self): + if self.reset: + self.strm_run = 0 + else: + if self.ld_dma_start_pulse_r: + self.strm_run = 1 + elif self.loop_done: + self.strm_run = 0 + + @always_comb + def ld_dma_start_pulse_logic(self): + if self.cfg_ld_dma_ctrl_mode == 0: + self.ld_dma_start_pulse_next = 0 + elif self.cfg_ld_dma_ctrl_mode == 1: + self.ld_dma_start_pulse_next = (~self.strm_run) & self.ld_dma_start_pulse + elif (self.cfg_ld_dma_ctrl_mode == 2) | (self.cfg_ld_dma_ctrl_mode == 3): + self.ld_dma_start_pulse_next = (((~self.strm_run) & self.ld_dma_start_pulse) + | ((self.ld_dma_done_pulse) + & ((self.repeat_cnt + 1) < self.cfg_ld_dma_num_repeat))) + else: + self.ld_dma_start_pulse_next = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def ld_dma_start_pulse_ff(self): + if self.reset: + self.ld_dma_start_pulse_r = 0 + else: + if self.ld_dma_start_pulse_r: + self.ld_dma_start_pulse_r = 0 + else: + self.ld_dma_start_pulse_r = self.ld_dma_start_pulse_next + + @always_ff((posedge, "clk"), (posedge, "reset")) + def cycle_counter(self): + if self.reset: + self.cycle_count = 0 + else: + if self.ld_dma_start_pulse_r: + self.cycle_count = 0 + elif self.loop_done: + self.cycle_count = 0 + elif self.strm_run: + self.cycle_count = self.cycle_count + 1 + + @always_comb + def strm_data_mux(self): + self.strm_data_muxed = self.strm_data + if self.cfg_ld_dma_ctrl_use_valid: + self.strm_data_valid_muxed = self.strm_data_valid + elif ~self.cfg_ld_dma_ctrl_use_flush: + self.strm_data_valid_muxed = self.strm_data_start_pulse + else: + self.strm_data_valid_muxed = 0 + + @always_comb + def data_g2f_logic(self): + for i in range(self._params.cgra_per_glb): + if self.cfg_data_network_g2f_mux[i] == 1: + self.data_g2f_w[i] = self.strm_data_muxed + self.data_valid_g2f_w[i] = self.strm_data_valid_muxed + else: + self.data_g2f_w[i] = 0 + self.data_valid_g2f_w[i] = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def data_g2f_ff(self): + if self.reset: + self.data_g2f = 0 + self.data_valid_g2f = 0 + else: + for i in range(self._params.cgra_per_glb): + self.data_g2f[i] = self.data_g2f_w[i] + self.data_valid_g2f[i] = self.data_valid_g2f_w[i] + + @always_comb + def data_flush_logic(self): + if self.cfg_ld_dma_ctrl_use_flush: + self.data_flush_w = self.strm_data_start_pulse + else: + self.data_flush_w = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def data_flush_ff(self): + if self.reset: + self.data_flush = 0 + else: + self.data_flush = self.data_flush_w + + @always_comb + def ld_dma_done_pulse_logic(self): + self.ld_dma_done_pulse_w = self.strm_run & self.loop_done + + @always_comb + def strm_rdrq_packet_logic(self): + self.strm_rd_en_w = self.cycle_valid + self.strm_rd_addr_w = resize(self.data_current_addr, self._params.glb_addr_width) + + @always_ff((posedge, "clk"), (posedge, "reset")) + def last_strm_rd_addr_ff(self): + if self.reset: + self.last_strm_rd_addr_r = 0 + else: + if self.strm_rd_en_w: + self.last_strm_rd_addr_r = self.strm_rd_addr_w + + @always_comb + def bank_rdrq_packet_logic(self): + self.is_cached = (self.strm_rd_addr_w[self._params.glb_addr_width - 1, self._params.bank_byte_offset] + == self.last_strm_rd_addr_r[self._params.glb_addr_width - 1, + self._params.bank_byte_offset]) + self.bank_rdrq_rd_en = self.strm_rd_en_w & (self.is_first | (~self.is_cached)) + self.bank_rdrq_rd_addr = self.strm_rd_addr_w + + @always_comb + def rdrq_packet_logic(self): + if self.cfg_tile_connected_next | self.cfg_tile_connected_prev: + self.rdrq_packet_dma2bank_w = 0 + self.rdrq_packet_dma2ring_w['rd_en'] = self.bank_rdrq_rd_en + self.rdrq_packet_dma2ring_w['rd_addr'] = self.bank_rdrq_rd_addr + else: + self.rdrq_packet_dma2bank_w['rd_en'] = self.bank_rdrq_rd_en + self.rdrq_packet_dma2bank_w['rd_addr'] = self.bank_rdrq_rd_addr + self.rdrq_packet_dma2ring_w = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def rdrq_packet_ff(self): + if self.reset: + self.rdrq_packet_dma2bank = 0 + self.rdrq_packet_dma2ring = 0 + else: + self.rdrq_packet_dma2bank = self.rdrq_packet_dma2bank_w + self.rdrq_packet_dma2ring = self.rdrq_packet_dma2ring_w + + def add_dma2bank_clk_en(self): + self.clk_en_gen = GlbClkEnGen(cnt=self._params.tile2sram_rd_delay + self._params.rd_clk_en_margin) + self.dma2bank_clk_en = self.var("dma2bank_clk_en", 1) + self.add_child("dma2bank_clk_en_gen", + self.clk_en_gen, + clk=self.clk, + reset=self.reset, + enable=self.rdrq_packet_dma2bank_w['rd_en'], + clk_en=self.dma2bank_clk_en + ) + self.wire(self.clk_en_dma2bank, self.dma2bank_clk_en) + + @always_comb + def rdrs_packet_logic(self): + if self.cfg_tile_connected_next | self.cfg_tile_connected_prev: + self.rdrs_packet = self.rdrs_packet_ring2dma + else: + self.rdrs_packet = self.rdrs_packet_bank2dma + + @always_ff((posedge, "clk"), (posedge, "reset")) + def bank_rdrs_data_cache_ff(self): + if self.reset: + self.bank_rdrs_data_cache_r = 0 + else: + if self.rdrs_packet['rd_data_valid']: + self.bank_rdrs_data_cache_r = self.rdrs_packet['rd_data'] + + @always_comb + def strm_data_logic(self): + if self.strm_data_sel == 0: + self.strm_data = self.bank_rdrs_data_cache_r[self._params.cgra_data_width - 1, 0] + elif self.strm_data_sel == 1: + self.strm_data = self.bank_rdrs_data_cache_r[self._params.cgra_data_width * 2 - 1, + self._params.cgra_data_width * 1] + elif self.strm_data_sel == 2: + self.strm_data = self.bank_rdrs_data_cache_r[self._params.cgra_data_width * 3 - 1, + self._params.cgra_data_width * 2] + elif self.strm_data_sel == 3: + self.strm_data = self.bank_rdrs_data_cache_r[self._params.cgra_data_width * 4 - 1, + self._params.cgra_data_width * 3] + else: + self.strm_data = self.bank_rdrs_data_cache_r[self._params.cgra_data_width - 1, 0] + + def add_strm_rd_en_pipeline(self): + maximum_latency = (2 * self._params.num_glb_tiles + + self._params.chain_latency_overhead + self._params.tile2sram_rd_delay) + latency_width = clog2(maximum_latency) + self.strm_rd_en_d_arr = self.var( + "strm_rd_en_d_arr", 1, size=maximum_latency, explicit_array=True) + self.strm_rd_en_pipeline = Pipeline(width=1, + depth=maximum_latency, + flatten_output=True) + self.add_child("strm_rd_en_pipeline", + self.strm_rd_en_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.strm_rd_en_w, + out_=self.strm_rd_en_d_arr) + + self.wire(self.strm_data_valid, self.strm_rd_en_d_arr[resize( + self.cfg_data_network_latency, latency_width) + self._params.tile2sram_rd_delay]) + + def add_strm_data_sel_pipeline(self): + maximum_latency = (2 * self._params.num_glb_tiles + + self._params.chain_latency_overhead + self._params.tile2sram_rd_delay) + latency_width = clog2(maximum_latency) + self.wire(self.strm_data_sel_w, + self.strm_rd_addr_w[self._params.bank_byte_offset - 1, self._params.cgra_byte_offset]) + self.strm_data_sel_arr = self.var("strm_data_sel_arr", width=self._params.bank_byte_offset + - self._params.cgra_byte_offset, size=maximum_latency, explicit_array=True) + self.strm_data_sel_pipeline = Pipeline(width=self._params.bank_byte_offset - self._params.cgra_byte_offset, + depth=maximum_latency, + flatten_output=True) + self.add_child("strm_data_sel_pipeline", + self.strm_data_sel_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.strm_data_sel_w, + out_=self.strm_data_sel_arr) + + self.strm_data_sel = self.strm_data_sel_arr[resize( + self.cfg_data_network_latency, latency_width) + self._params.tile2sram_rd_delay] + + def add_strm_data_start_pulse_pipeline(self): + maximum_latency = (2 * self._params.num_glb_tiles + + self._params.chain_latency_overhead + self._params.tile2sram_rd_delay) + latency_width = clog2(maximum_latency) + self.strm_data_start_pulse_d_arr = self.var( + "strm_data_start_pulse_d_arr", 1, size=maximum_latency, explicit_array=True) + self.strm_data_start_pulse_pipeline = Pipeline(width=1, + depth=maximum_latency, + flatten_output=True) + self.add_child("strm_dma_start_pulse_pipeline", + self.strm_data_start_pulse_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.ld_dma_start_pulse_r, + out_=self.strm_data_start_pulse_d_arr) + self.strm_data_start_pulse = self.var("strm_data_start_pulse", 1) + self.wire(self.strm_data_start_pulse, + self.strm_data_start_pulse_d_arr[resize(self.cfg_data_network_latency, latency_width) + + self._params.tile2sram_rd_delay]) + + def add_ld_dma_done_pulse_pipeline(self): + maximum_latency = (2 * self._params.num_glb_tiles + self._params.chain_latency_overhead + + self._params.tile2sram_rd_delay + 1) + latency_width = clog2(maximum_latency) + self.ld_dma_done_pulse_d_arr = self.var( + "ld_dma_done_pulse_d_arr", 1, size=maximum_latency, explicit_array=True) + self.ld_dma_done_pulse_pipeline = Pipeline(width=1, + depth=maximum_latency, + flatten_output=True) + self.add_child("ld_dma_done_pulse_pipeline", + self.ld_dma_done_pulse_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.ld_dma_done_pulse_w, + out_=self.ld_dma_done_pulse_d_arr) + self.wire(self.ld_dma_done_pulse, + self.ld_dma_done_pulse_d_arr[resize(self.cfg_data_network_latency, latency_width) + + self._params.tile2sram_rd_delay + 1]) + + def add_done_pulse_last_pipeline(self): + self.interrupt_last_pipeline = Pipeline(width=1, depth=self._params.interrupt_cnt) + self.add_child("ld_dma_interrupt_pipeline", + self.interrupt_last_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.ld_dma_done_pulse, + out_=self.ld_dma_done_pulse_last) + + @always_ff((posedge, "clk"), (posedge, "reset")) + def interrupt_ff(self): + if self.reset: + self.ld_dma_done_interrupt = 0 + else: + if self.ld_dma_done_pulse: + self.ld_dma_done_interrupt = 1 + elif self.ld_dma_done_pulse_last: + self.ld_dma_done_interrupt = 0 diff --git a/global_buffer/design_amber/glb_store_dma.py b/global_buffer/design_amber/glb_store_dma.py new file mode 100644 index 0000000000..192b879191 --- /dev/null +++ b/global_buffer/design_amber/glb_store_dma.py @@ -0,0 +1,454 @@ +from kratos import Generator, always_ff, always_comb, posedge, const, resize, ext, clog2 +from global_buffer.design.glb_loop_iter import GlbLoopIter +from global_buffer.design.glb_sched_gen import GlbSchedGen +from global_buffer.design.glb_addr_gen import GlbAddrGen +from global_buffer.design.pipeline import Pipeline +from global_buffer.design.global_buffer_parameter import GlobalBufferParams +from global_buffer.design.glb_header import GlbHeader +from global_buffer.design.glb_clk_en_gen import GlbClkEnGen + + +class GlbStoreDma(Generator): + def __init__(self, _params: GlobalBufferParams): + super().__init__("glb_store_dma") + self._params = _params + self.header = GlbHeader(self._params) + assert self._params.bank_data_width == self._params.cgra_data_width * 4 + + self.clk = self.clock("clk") + self.reset = self.reset("reset") + self.clk_en_dma2bank = self.output("clk_en_dma2bank", 1) + + self.data_f2g = self.input("data_f2g", width=self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.data_valid_f2g = self.input("data_valid_f2g", 1, size=self._params.cgra_per_glb, packed=True) + + self.wr_packet_dma2bank = self.output("wr_packet_dma2bank", self.header.wr_packet_t) + self.wr_packet_dma2ring = self.output("wr_packet_dma2ring", self.header.wr_packet_t) + + self.cfg_tile_connected_prev = self.input("cfg_tile_connected_prev", 1) + self.cfg_tile_connected_next = self.input("cfg_tile_connected_next", 1) + self.cfg_st_dma_num_repeat = self.input("cfg_st_dma_num_repeat", clog2(self._params.queue_depth) + 1) + self.cfg_st_dma_ctrl_mode = self.input("cfg_st_dma_ctrl_mode", 2) + self.cfg_st_dma_ctrl_use_valid = self.input("cfg_st_dma_ctrl_use_valid", 1) + self.cfg_data_network_latency = self.input("cfg_data_network_latency", self._params.latency_width) + # Amber cfg_dma_header_t == cfg_load_dma_header_t == cfg_store_dma_header_t, see glb_header.py + self.cfg_st_dma_header = self.input("cfg_st_dma_header", self.header.cfg_store_dma_header_t, + size=self._params.queue_depth, explicit_array=True) + self.cfg_data_network_f2g_mux = self.input("cfg_data_network_f2g_mux", self._params.cgra_per_glb) + + self.st_dma_start_pulse = self.input("st_dma_start_pulse", 1) + self.st_dma_done_interrupt = self.output("st_dma_done_interrupt", 1) + + # localparam + self.cgra_strb_width = self._params.cgra_data_width // 8 + self.cgra_strb_value = 2 ** (self._params.cgra_data_width // 8) - 1 + + # local variables + self.wr_packet_dma2bank_w = self.var("wr_packet_dma2bank_w", self.header.wr_packet_t) + self.wr_packet_dma2ring_w = self.var("wr_packet_dma2ring_w", self.header.wr_packet_t) + self.data_f2g_r = self.var("data_f2g_r", width=self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.data_valid_f2g_r = self.var("data_valid_f2g_r", 1, size=self._params.cgra_per_glb, packed=True) + self.strm_data = self.var("strm_data", width=self._params.cgra_data_width) + self.strm_data_valid = self.var("strm_data_valid", width=1) + self.st_dma_done_pulse = self.var("st_dma_done_pulse", 1) + self.st_dma_done_pulse_last = self.var("st_dma_done_pulse_last", 1) + self.strm_wr_data_w = self.var("strm_wr_data_w", width=self._params.cgra_data_width) + self.strm_wr_addr_w = self.var("strm_wr_addr_w", width=self._params.glb_addr_width) + self.last_strm_wr_addr_r = self.var("last_strm_wr_addr_r", width=self._params.glb_addr_width) + self.strm_wr_en_w = self.var("strm_wr_en_w", width=1) + self.strm_data_sel = self.var("strm_data_sel", self._params.bank_byte_offset - self._params.cgra_byte_offset) + + self.bank_addr_match = self.var("bank_addr_match", 1) + self.bank_wr_en = self.var("bank_wr_en", 1) + self.bank_wr_addr = self.var("bank_wr_addr", width=self._params.glb_addr_width) + self.bank_wr_data_cache_r = self.var("bank_wr_data_cache_r", self._params.bank_data_width) + self.bank_wr_data_cache_w = self.var("bank_wr_data_cache_w", self._params.bank_data_width) + self.bank_wr_strb_cache_r = self.var("bank_wr_strb_cache_r", self._params.bank_strb_width) + self.bank_wr_strb_cache_w = self.var("bank_wr_strb_cache_w", self._params.bank_strb_width) + + self.done_pulse_w = self.var("done_pulse_w", 1) + self.st_dma_start_pulse_next = self.var("st_dma_start_pulse_next", 1) + self.st_dma_start_pulse_r = self.var("st_dma_start_pulse_r", 1) + self.is_first = self.var("is_first", 1) + self.is_last = self.var("is_last", 1) + self.strm_run = self.var("strm_run", 1) + self.loop_done = self.var("loop_done", 1) + self.cycle_valid = self.var("cycle_valid", 1) + self.cycle_valid_muxed = self.var("cycle_valid_muxed", 1) + self.cycle_count = self.var("cycle_count", self._params.cycle_count_width) + self.cycle_current_addr = self.var("cycle_current_addr", self._params.cycle_count_width) + self.data_current_addr = self.var("data_current_addr", self._params.glb_addr_width + 1) + self.loop_mux_sel = self.var("loop_mux_sel", clog2(self._params.loop_level)) + self.repeat_cnt = self.var("repeat_cnt", clog2(self._params.queue_depth) + 1) + + if self._params.queue_depth != 1: + self.queue_sel_r = self.var("queue_sel_r", max(1, clog2(self.repeat_cnt.width))) + + # Current dma header + self.current_dma_header = self.var("current_dma_header", self.header.cfg_store_dma_header_t) + if self._params.queue_depth == 1: + self.wire(self.cfg_st_dma_header, self.current_dma_header) + else: + self.wire(self.cfg_st_dma_header[self.queue_sel_r], self.current_dma_header) + + if self._params.queue_depth != 1: + self.add_always(self.queue_sel_ff) + + self.add_always(self.repeat_cnt_ff) + self.add_always(self.is_first_ff) + self.add_always(self.is_last_ff) + self.add_always(self.strm_run_ff) + self.add_always(self.st_dma_start_pulse_logic) + self.add_always(self.st_dma_start_pulse_ff) + self.add_always(self.cycle_counter) + self.add_always(self.data_f2g_ff) + self.add_always(self.data_f2g_logic) + self.add_always(self.cycle_valid_comb) + self.add_always(self.strm_wr_packet_comb) + self.add_always(self.last_strm_wr_addr_ff) + self.add_always(self.strm_data_sel_comb) + self.add_always(self.bank_wr_packet_cache_comb) + self.add_always(self.bank_wr_packet_cache_ff) + self.add_always(self.bank_wr_packet_logic) + self.add_always(self.wr_packet_ff) + self.add_always(self.wr_packet_logic) + self.add_dma2bank_clk_en() + self.add_always(self.strm_done_pulse_logic) + self.add_done_pulse_pipeline() + self.add_done_pulse_last_pipeline() + self.add_always(self.interrupt_ff) + + # Loop iteration shared for cycle and data + self.loop_iter = GlbLoopIter(self._params, loop_level=self._params.store_dma_loop_level) + self.add_child("loop_iter", + self.loop_iter, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + step=self.cycle_valid_muxed, + mux_sel_out=self.loop_mux_sel, + restart=self.loop_done) + self.wire(self.loop_iter.dim, self.current_dma_header[f"dim"]) + for i in range(self._params.loop_level): + self.wire(self.loop_iter.ranges[i], self.current_dma_header[f"range_{i}"]) + + # Cycle stride + self.cycle_stride_sched_gen = GlbSchedGen(self._params) + self.add_child("cycle_stride_sched_gen", + self.cycle_stride_sched_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.st_dma_start_pulse_r, + cycle_count=self.cycle_count, + current_addr=self.cycle_current_addr, + finished=self.loop_done, + valid_output=self.cycle_valid) + + self.cycle_stride_addr_gen = GlbAddrGen(self._params, loop_level=self._params.store_dma_loop_level) + self.cycle_stride_addr_gen.p_addr_width.value = self._params.cycle_count_width + self.add_child("cycle_stride_addr_gen", + self.cycle_stride_addr_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.st_dma_start_pulse_r, + step=self.cycle_valid_muxed, + mux_sel=self.loop_mux_sel) + self.wire(self.cycle_stride_addr_gen.addr_out, self.cycle_current_addr) + self.wire(self.cycle_stride_addr_gen.start_addr, self.current_dma_header[f"cycle_start_addr"]) + for i in range(self._params.loop_level): + self.wire(self.cycle_stride_addr_gen.strides[i], + self.current_dma_header[f"cycle_stride_{i}"]) + + # Data stride + self.data_stride_addr_gen = GlbAddrGen(self._params, loop_level=self._params.store_dma_loop_level) + self.data_stride_addr_gen.p_addr_width.value = self._params.glb_addr_width + 1 + self.add_child("data_stride_addr_gen", + self.data_stride_addr_gen, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + restart=self.st_dma_start_pulse_r, + step=self.cycle_valid_muxed, + mux_sel=self.loop_mux_sel, + addr_out=self.data_current_addr) + self.wire(self.data_stride_addr_gen.start_addr, ext(self.current_dma_header[f"start_addr"], + self._params.glb_addr_width + 1)) + for i in range(self._params.loop_level): + self.wire(self.data_stride_addr_gen.strides[i], self.current_dma_header[f"stride_{i}"]) + + @always_ff((posedge, "clk"), (posedge, "reset")) + def repeat_cnt_ff(self): + if self.reset: + self.repeat_cnt = 0 + else: + if self.cfg_st_dma_ctrl_mode == 2: + if self.st_dma_done_pulse: + if (self.repeat_cnt + 1) < self.cfg_st_dma_num_repeat: + self.repeat_cnt += 1 + elif self.cfg_st_dma_ctrl_mode == 3: + if self.st_dma_done_pulse: + if (((self.repeat_cnt + 1) < self.cfg_st_dma_num_repeat) + & ((self.repeat_cnt + 1) < self._params.queue_depth)): + self.repeat_cnt += 1 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def queue_sel_ff(self): + if self.reset: + self.queue_sel_r = 0 + else: + if self.cfg_st_dma_ctrl_mode == 3: + if self.st_dma_done_pulse: + if (self.repeat_cnt + 1) < self.cfg_st_dma_num_repeat: + self.queue_sel_r = self.queue_sel_r + 1 + else: + self.queue_sel_r = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def is_first_ff(self): + if self.reset: + self.is_first = 0 + else: + if self.st_dma_start_pulse_r: + self.is_first = 1 + elif self.strm_wr_en_w: + self.is_first = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def is_last_ff(self): + if self.reset: + self.is_last = 0 + else: + if self.loop_done: + self.is_last = 1 + elif self.bank_wr_en: + self.is_last = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def strm_run_ff(self): + if self.reset: + self.strm_run = 0 + else: + if self.st_dma_start_pulse_r: + self.strm_run = 1 + elif self.loop_done: + self.strm_run = 0 + + @always_comb + def st_dma_start_pulse_logic(self): + if self.cfg_st_dma_ctrl_mode == 0: + self.st_dma_start_pulse_next = 0 + elif self.cfg_st_dma_ctrl_mode == 1: + self.st_dma_start_pulse_next = (~self.strm_run) & self.st_dma_start_pulse + elif (self.cfg_st_dma_ctrl_mode == 2) | (self.cfg_st_dma_ctrl_mode == 3): + self.st_dma_start_pulse_next = (((~self.strm_run) & self.st_dma_start_pulse) + | ((self.st_dma_done_pulse) + & ((self.repeat_cnt + 1) < self.cfg_st_dma_num_repeat))) + else: + self.st_dma_start_pulse_next = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def st_dma_start_pulse_ff(self): + if self.reset: + self.st_dma_start_pulse_r = 0 + else: + if self.st_dma_start_pulse_r: + self.st_dma_start_pulse_r = 0 + else: + self.st_dma_start_pulse_r = self.st_dma_start_pulse_next + + @always_ff((posedge, "clk"), (posedge, "reset")) + def cycle_counter(self): + if self.reset: + self.cycle_count = 0 + else: + if self.st_dma_start_pulse_r: + self.cycle_count = 0 + elif self.loop_done: + self.cycle_count = 0 + elif self.strm_run: + self.cycle_count = self.cycle_count + 1 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def data_f2g_ff(self): + if self.reset: + self.data_f2g_r = 0 + self.data_valid_f2g_r = 0 + else: + for i in range(self._params.cgra_per_glb): + self.data_f2g_r[i] = self.data_f2g[i] + self.data_valid_f2g_r[i] = self.data_valid_f2g[i] + + @always_comb + def data_f2g_logic(self): + self.strm_data = 0 + self.strm_data_valid = 0 + for i in range(self._params.cgra_per_glb): + if self.cfg_data_network_f2g_mux[i] == 1: + self.strm_data = self.data_f2g_r[i] + self.strm_data_valid = self.data_valid_f2g_r[i] + else: + self.strm_data = self.strm_data + self.strm_data_valid = self.strm_data_valid + + @always_comb + def cycle_valid_comb(self): + if self.cfg_st_dma_ctrl_use_valid: + self.cycle_valid_muxed = self.strm_data_valid + else: + self.cycle_valid_muxed = self.cycle_valid + + @always_comb + def strm_wr_packet_comb(self): + self.strm_wr_en_w = self.cycle_valid_muxed + self.strm_wr_addr_w = resize(self.data_current_addr, self._params.glb_addr_width) + self.strm_wr_data_w = self.strm_data + + @always_ff((posedge, "clk"), (posedge, "reset")) + def last_strm_wr_addr_ff(self): + if self.reset: + self.last_strm_wr_addr_r = 0 + else: + if self.strm_wr_en_w: + self.last_strm_wr_addr_r = self.strm_wr_addr_w + + @always_comb + def strm_data_sel_comb(self): + self.strm_data_sel = self.strm_wr_addr_w[self._params.bank_byte_offset - 1, self._params.cgra_byte_offset] + + @always_comb + def bank_wr_packet_cache_comb(self): + self.bank_wr_strb_cache_w = self.bank_wr_strb_cache_r + self.bank_wr_data_cache_w = self.bank_wr_data_cache_r + # First, if cached data is written to memory, clear it. + if self.bank_wr_en: + self.bank_wr_strb_cache_w = 0 + self.bank_wr_data_cache_w = 0 + # Next, save data to cache + if self.strm_wr_en_w: + if self.strm_data_sel == 0: + self.bank_wr_strb_cache_w[self.cgra_strb_width - 1, + 0] = const(self.cgra_strb_value, self.cgra_strb_width) + self.bank_wr_data_cache_w[self._params.cgra_data_width - 1, 0] = self.strm_wr_data_w + elif self.strm_data_sel == 1: + self.bank_wr_strb_cache_w[self.cgra_strb_width * 2 - 1, + self.cgra_strb_width] = const(self.cgra_strb_value, + self.cgra_strb_width) + self.bank_wr_data_cache_w[self._params.cgra_data_width * 2 - 1, + self._params.cgra_data_width] = self.strm_wr_data_w + elif self.strm_data_sel == 2: + self.bank_wr_strb_cache_w[self.cgra_strb_width * 3 - 1, + self.cgra_strb_width * 2] = const(self.cgra_strb_value, + self.cgra_strb_width) + self.bank_wr_data_cache_w[self._params.cgra_data_width * 3 - 1, + self._params.cgra_data_width * 2] = self.strm_wr_data_w + elif self.strm_data_sel == 3: + self.bank_wr_strb_cache_w[self.cgra_strb_width * 4 - 1, + self.cgra_strb_width * 3] = const(self.cgra_strb_value, + self.cgra_strb_width) + self.bank_wr_data_cache_w[self._params.cgra_data_width * 4 - 1, + self._params.cgra_data_width * 3] = self.strm_wr_data_w + else: + self.bank_wr_strb_cache_w = self.bank_wr_strb_cache_r + self.bank_wr_data_cache_w = self.bank_wr_data_cache_r + + @always_ff((posedge, "clk"), (posedge, "reset")) + def bank_wr_packet_cache_ff(self): + if self.reset: + self.bank_wr_strb_cache_r = 0 + self.bank_wr_data_cache_r = 0 + else: + self.bank_wr_strb_cache_r = self.bank_wr_strb_cache_w + self.bank_wr_data_cache_r = self.bank_wr_data_cache_w + + @always_comb + def bank_wr_packet_logic(self): + self.bank_addr_match = (self.strm_wr_addr_w[self._params.glb_addr_width - 1, self._params.bank_byte_offset] + == self.last_strm_wr_addr_r[self._params.glb_addr_width - 1, + self._params.bank_byte_offset]) + self.bank_wr_en = ((self.strm_wr_en_w & (~self.bank_addr_match) & (~self.is_first)) | self.is_last) + self.bank_wr_addr = self.last_strm_wr_addr_r + + @always_comb + def wr_packet_logic(self): + if self.cfg_tile_connected_next | self.cfg_tile_connected_prev: + self.wr_packet_dma2bank_w = 0 + self.wr_packet_dma2ring_w['wr_en'] = self.bank_wr_en + self.wr_packet_dma2ring_w['wr_strb'] = self.bank_wr_strb_cache_r + self.wr_packet_dma2ring_w['wr_data'] = self.bank_wr_data_cache_r + self.wr_packet_dma2ring_w['wr_addr'] = self.bank_wr_addr + else: + self.wr_packet_dma2bank_w['wr_en'] = self.bank_wr_en + self.wr_packet_dma2bank_w['wr_strb'] = self.bank_wr_strb_cache_r + self.wr_packet_dma2bank_w['wr_data'] = self.bank_wr_data_cache_r + self.wr_packet_dma2bank_w['wr_addr'] = self.bank_wr_addr + self.wr_packet_dma2ring_w = 0 + + @always_ff((posedge, "clk"), (posedge, "reset")) + def wr_packet_ff(self): + if self.reset: + self.wr_packet_dma2bank = 0 + self.wr_packet_dma2ring = 0 + else: + self.wr_packet_dma2bank = self.wr_packet_dma2bank_w + self.wr_packet_dma2ring = self.wr_packet_dma2ring_w + + def add_dma2bank_clk_en(self): + self.clk_en_gen = GlbClkEnGen(cnt=self._params.tile2sram_wr_delay + self._params.wr_clk_en_margin) + self.dma2bank_clk_en = self.var("dma2bank_clk_en", 1) + self.add_child("dma2bank_clk_en_gen", + self.clk_en_gen, + clk=self.clk, + reset=self.reset, + enable=self.wr_packet_dma2bank_w['wr_en'], + clk_en=self.dma2bank_clk_en + ) + self.wire(self.clk_en_dma2bank, self.dma2bank_clk_en) + + @always_comb + def strm_done_pulse_logic(self): + self.done_pulse_w = self.loop_done & self.strm_run + + def add_done_pulse_pipeline(self): + maximum_latency = (2 * self._params.num_glb_tiles + self._params.tile2sram_wr_delay + + self._params.chain_latency_overhead) + latency_width = clog2(maximum_latency) + self.done_pulse_d_arr = self.var( + "done_pulse_d_arr", 1, size=maximum_latency, explicit_array=True) + self.done_pulse_pipeline = Pipeline(width=1, + depth=maximum_latency, + flatten_output=True) + self.add_child("done_pulse_pipeline", + self.done_pulse_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.done_pulse_w, + out_=self.done_pulse_d_arr) + + self.wire(self.st_dma_done_pulse, + self.done_pulse_d_arr[(resize(self.cfg_data_network_latency, latency_width) + + self._params.tile2sram_wr_delay)]) + + def add_done_pulse_last_pipeline(self): + self.interrupt_last_pipeline = Pipeline(width=1, depth=self._params.interrupt_cnt) + self.add_child("st_dma_interrupt_pipeline", + self.interrupt_last_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.st_dma_done_pulse, + out_=self.st_dma_done_pulse_last) + + @always_ff((posedge, "clk"), (posedge, "reset")) + def interrupt_ff(self): + if self.reset: + self.st_dma_done_interrupt = 0 + else: + if self.st_dma_done_pulse: + self.st_dma_done_interrupt = 1 + elif self.st_dma_done_pulse_last: + self.st_dma_done_interrupt = 0 diff --git a/global_buffer/design_amber/glb_tile.py b/global_buffer/design_amber/glb_tile.py new file mode 100644 index 0000000000..365e3b377e --- /dev/null +++ b/global_buffer/design_amber/glb_tile.py @@ -0,0 +1,671 @@ +from kratos import Generator, RawStringStmt +from kratos.util import clock +from global_buffer.design_amber.glb_store_dma import GlbStoreDma +from global_buffer.design_amber.glb_load_dma import GlbLoadDma +from global_buffer.design.glb_pcfg_dma import GlbPcfgDma +from global_buffer.design.glb_cfg import GlbCfg +from global_buffer.design.glb_bank_mux import GlbBankMux +from global_buffer.design.glb_ring_switch import GlbRingSwitch +from global_buffer.design.glb_pcfg_broadcast import GlbPcfgBroadcast +from global_buffer.design.glb_switch import GlbSwitch +from global_buffer.design.glb_tile_ifc import GlbTileInterface +from global_buffer.design.global_buffer_parameter import GlobalBufferParams +from global_buffer.design.glb_header import GlbHeader +from global_buffer.design.glb_bank import GlbBank +from global_buffer.design.clk_gate import ClkGate + + +class GlbTile(Generator): + def __init__(self, _params: GlobalBufferParams): + super().__init__("glb_tile") + self._params = _params + self.header = GlbHeader(self._params) + + self.clk = self.clock("clk") + self.clk_en_pcfg_broadcast = self.clock_en("clk_en_pcfg_broadcast") + self.clk_en_master = self.clock_en("clk_en_master") + self.clk_en_bank_master = self.clock_en("clk_en_bank_master") + self.reset = self.reset("reset") + self.glb_tile_id = self.input("glb_tile_id", self._params.tile_sel_addr_width) + + self.strm_w2e_wsti_dict = {} + for port, size in self.header.packet_ports: + name = f"strm_{port}_w2e_wsti" + self.strm_w2e_wsti_dict[port] = self.input(name, size) + + self.strm_w2e_esto_dict = {} + for port, size in self.header.packet_ports: + name = f"strm_{port}_w2e_esto" + self.strm_w2e_esto_dict[port] = self.output(name, size) + + self.strm_e2w_esti_dict = {} + for port, size in self.header.packet_ports: + name = f"strm_{port}_e2w_esti" + self.strm_e2w_esti_dict[port] = self.input(name, size) + + self.strm_e2w_wsto_dict = {} + for port, size in self.header.packet_ports: + name = f"strm_{port}_e2w_wsto" + self.strm_e2w_wsto_dict[port] = self.output(name, size) + + self.pcfg_w2e_wsti_dict = {} + for port, size in self.header.rd_packet_ports: + name = f"pcfg_{port}_w2e_wsti" + self.pcfg_w2e_wsti_dict[port] = self.input(name, size) + + self.pcfg_w2e_esto_dict = {} + for port, size in self.header.rd_packet_ports: + name = f"pcfg_{port}_w2e_esto" + self.pcfg_w2e_esto_dict[port] = self.output(name, size) + + self.pcfg_e2w_esti_dict = {} + for port, size in self.header.rd_packet_ports: + name = f"pcfg_{port}_e2w_esti" + self.pcfg_e2w_esti_dict[port] = self.input(name, size) + + self.pcfg_e2w_wsto_dict = {} + for port, size in self.header.rd_packet_ports: + name = f"pcfg_{port}_e2w_wsto" + self.pcfg_e2w_wsto_dict[port] = self.output(name, size) + + # Processor AXI interface + self.if_proc = GlbTileInterface(addr_width=self._params.glb_addr_width, + data_width=self._params.bank_data_width, is_clk_en=True, is_strb=True) + self.if_proc_est_m = self.interface(self.if_proc, "if_proc_est_m") + self.if_proc_wst_s = self.interface(self.if_proc, "if_proc_wst_s") + # Connect m2s ports + for m2s_port in self.if_proc.m_to_s: + port = self.output(f"if_proc_est_m_{m2s_port}", self.if_proc_est_m[m2s_port].width) + self.wire(port, self.if_proc_est_m[m2s_port]) + port = self.input(f"if_proc_wst_s_{m2s_port}", self.if_proc_wst_s[m2s_port].width) + self.wire(port, self.if_proc_wst_s[m2s_port]) + # Connect s2m ports + for s2m_port in self.if_proc.s_to_m: + port = self.input(f"if_proc_est_m_{s2m_port}", self.if_proc_est_m[s2m_port].width) + self.wire(port, self.if_proc_est_m[s2m_port]) + port = self.output(f"if_proc_wst_s_{s2m_port}", self.if_proc_wst_s[s2m_port].width) + self.wire(port, self.if_proc_wst_s[s2m_port]) + + # Configuration interface + self.if_cfg = GlbTileInterface(addr_width=self._params.axi_addr_width, + data_width=self._params.axi_data_width, is_clk_en=True, is_strb=False) + self.if_cfg_est_m = self.interface(self.if_cfg, "if_cfg_est_m") + self.if_cfg_wst_s = self.interface(self.if_cfg, "if_cfg_wst_s") + # Connect m2s ports + for m2s_port in self.if_cfg.m_to_s: + port = self.output(f"if_cfg_est_m_{m2s_port}", self.if_cfg_est_m[m2s_port].width) + self.wire(port, self.if_cfg_est_m[m2s_port]) + port = self.input(f"if_cfg_wst_s_{m2s_port}", self.if_cfg_wst_s[m2s_port].width) + self.wire(port, self.if_cfg_wst_s[m2s_port]) + # Connect s2m ports + for s2m_port in self.if_cfg.s_to_m: + port = self.input(f"if_cfg_est_m_{s2m_port}", self.if_cfg_est_m[s2m_port].width) + self.wire(port, self.if_cfg_est_m[s2m_port]) + port = self.output(f"if_cfg_wst_s_{s2m_port}", self.if_cfg_wst_s[s2m_port].width) + self.wire(port, self.if_cfg_wst_s[s2m_port]) + + self.cfg_tile_connected_wsti = self.input("cfg_tile_connected_wsti", 1) + self.cfg_tile_connected_esto = self.output("cfg_tile_connected_esto", 1) + self.cfg_pcfg_tile_connected_wsti = self.input("cfg_pcfg_tile_connected_wsti", 1) + self.cfg_pcfg_tile_connected_esto = self.output("cfg_pcfg_tile_connected_esto", 1) + + self.cgra_cfg_jtag_wr_en_wsti = self.input("cgra_cfg_jtag_wr_en_wsti", 1) + self.cgra_cfg_jtag_rd_en_wsti = self.input("cgra_cfg_jtag_rd_en_wsti", 1) + self.cgra_cfg_jtag_addr_wsti = self.input("cgra_cfg_jtag_addr_wsti", self._params.cgra_cfg_addr_width) + self.cgra_cfg_jtag_data_wsti = self.input("cgra_cfg_jtag_data_wsti", self._params.cgra_cfg_data_width) + + self.cgra_cfg_jtag_wr_en_esto = self.output("cgra_cfg_jtag_wr_en_esto", 1) + self.cgra_cfg_jtag_rd_en_esto = self.output("cgra_cfg_jtag_rd_en_esto", 1) + self.cgra_cfg_jtag_addr_esto = self.output("cgra_cfg_jtag_addr_esto", self._params.cgra_cfg_addr_width) + self.cgra_cfg_jtag_data_esto = self.output("cgra_cfg_jtag_data_esto", self._params.cgra_cfg_data_width) + + self.cgra_cfg_jtag_rd_en_bypass_wsti = self.input("cgra_cfg_jtag_rd_en_bypass_wsti", 1) + self.cgra_cfg_jtag_addr_bypass_wsti = self.input( + "cgra_cfg_jtag_addr_bypass_wsti", self._params.cgra_cfg_addr_width) + self.cgra_cfg_jtag_rd_en_bypass_esto = self.output("cgra_cfg_jtag_rd_en_bypass_esto", 1) + self.cgra_cfg_jtag_addr_bypass_esto = self.output( + "cgra_cfg_jtag_addr_bypass_esto", self._params.cgra_cfg_addr_width) + + self.cgra_cfg_pcfg_wr_en_w2e_wsti = self.input("cgra_cfg_pcfg_wr_en_w2e_wsti", 1) + self.cgra_cfg_pcfg_rd_en_w2e_wsti = self.input("cgra_cfg_pcfg_rd_en_w2e_wsti", 1) + self.cgra_cfg_pcfg_addr_w2e_wsti = self.input("cgra_cfg_pcfg_addr_w2e_wsti", self._params.cgra_cfg_addr_width) + self.cgra_cfg_pcfg_data_w2e_wsti = self.input("cgra_cfg_pcfg_data_w2e_wsti", self._params.cgra_cfg_data_width) + self.cgra_cfg_pcfg_wr_en_w2e_esto = self.output("cgra_cfg_pcfg_wr_en_w2e_esto", 1) + self.cgra_cfg_pcfg_rd_en_w2e_esto = self.output("cgra_cfg_pcfg_rd_en_w2e_esto", 1) + self.cgra_cfg_pcfg_addr_w2e_esto = self.output("cgra_cfg_pcfg_addr_w2e_esto", self._params.cgra_cfg_addr_width) + self.cgra_cfg_pcfg_data_w2e_esto = self.output("cgra_cfg_pcfg_data_w2e_esto", self._params.cgra_cfg_data_width) + + self.cgra_cfg_pcfg_wr_en_e2w_esti = self.input("cgra_cfg_pcfg_wr_en_e2w_esti", 1) + self.cgra_cfg_pcfg_rd_en_e2w_esti = self.input("cgra_cfg_pcfg_rd_en_e2w_esti", 1) + self.cgra_cfg_pcfg_addr_e2w_esti = self.input("cgra_cfg_pcfg_addr_e2w_esti", self._params.cgra_cfg_addr_width) + self.cgra_cfg_pcfg_data_e2w_esti = self.input("cgra_cfg_pcfg_data_e2w_esti", self._params.cgra_cfg_data_width) + self.cgra_cfg_pcfg_wr_en_e2w_wsto = self.output("cgra_cfg_pcfg_wr_en_e2w_wsto", 1) + self.cgra_cfg_pcfg_rd_en_e2w_wsto = self.output("cgra_cfg_pcfg_rd_en_e2w_wsto", 1) + self.cgra_cfg_pcfg_addr_e2w_wsto = self.output("cgra_cfg_pcfg_addr_e2w_wsto", self._params.cgra_cfg_addr_width) + self.cgra_cfg_pcfg_data_e2w_wsto = self.output("cgra_cfg_pcfg_data_e2w_wsto", self._params.cgra_cfg_data_width) + + self.strm_data_f2g = self.input("strm_data_f2g", self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.strm_data_valid_f2g = self.input("strm_data_valid_f2g", 1, size=self._params.cgra_per_glb, packed=True) + self.strm_data_g2f = self.output("strm_data_g2f", self._params.cgra_data_width, + size=self._params.cgra_per_glb, packed=True) + self.strm_data_valid_g2f = self.output( + "strm_data_valid_g2f", 1, size=self._params.cgra_per_glb, packed=True) + self.data_flush = self.output("data_flush", 1) + + self.cgra_cfg_g2f_cfg_wr_en = self.output( + "cgra_cfg_g2f_cfg_wr_en", 1, size=self._params.cgra_per_glb, packed=True) + self.cgra_cfg_g2f_cfg_rd_en = self.output( + "cgra_cfg_g2f_cfg_rd_en", 1, size=self._params.cgra_per_glb, packed=True) + self.cgra_cfg_g2f_cfg_addr = self.output( + "cgra_cfg_g2f_cfg_addr", self._params.cgra_cfg_addr_width, size=self._params.cgra_per_glb, packed=True) + self.cgra_cfg_g2f_cfg_data = self.output( + "cgra_cfg_g2f_cfg_data", self._params.cgra_cfg_data_width, size=self._params.cgra_per_glb, packed=True) + + self.strm_g2f_start_pulse = self.input("strm_g2f_start_pulse", 1) + self.strm_f2g_start_pulse = self.input("strm_f2g_start_pulse", 1) + self.pcfg_start_pulse = self.input("pcfg_start_pulse", 1) + self.strm_f2g_interrupt_pulse = self.output("strm_f2g_interrupt_pulse", 1) + self.strm_g2f_interrupt_pulse = self.output("strm_g2f_interrupt_pulse", 1) + self.pcfg_g2f_interrupt_pulse = self.output("pcfg_g2f_interrupt_pulse", 1) + + # Struct + self.struct_wiring() + + # Local variables + # configuration + self.cfg_tile_connected_prev = self.var("cfg_tile_connected_prev", 1) + self.cfg_tile_connected_next = self.var("cfg_tile_connected_next", 1) + self.cfg_pcfg_tile_connected_prev = self.var("cfg_pcfg_tile_connected_prev", 1) + self.cfg_pcfg_tile_connected_next = self.var("cfg_pcfg_tile_connected_next", 1) + + # st dma + self.cfg_st_dma_ctrl = self.var("cfg_st_dma_ctrl", self.header.cfg_store_dma_ctrl_t) + self.cfg_st_dma_header = self.var("cfg_st_dma_header", self.header.cfg_dma_header_t, + size=self._params.queue_depth) + # ld dma + self.cfg_ld_dma_ctrl = self.var("cfg_ld_dma_ctrl", self.header.cfg_load_dma_ctrl_t) + self.cfg_ld_dma_header = self.var("cfg_ld_dma_header", self.header.cfg_dma_header_t, + size=self._params.queue_depth) + # pcfg dma + self.cfg_pcfg_dma_ctrl = self.var("cfg_pcfg_dma_ctrl", self.header.cfg_pcfg_dma_ctrl_t) + self.cfg_pcfg_dma_header = self.var("cfg_pcfg_dma_header", self.header.cfg_pcfg_dma_header_t) + + # pcfg broadcast + self.cfg_pcfg_broadcast_mux = self.var("cfg_pcfg_broadcast_mux", self.header.cfg_pcfg_broadcast_mux_t) + + # Clock gating - cfg + self.gclk_cfg = self.var("gclk_cfg", 1) + self.clk_en_cfg = self.var("clk_en_cfg", 1) + self.wire(self.clk_en_cfg, self.if_cfg_wst_s['wr_clk_en'] | self.if_cfg_wst_s['rd_clk_en']) + self.add_child("glb_clk_gate_cfg", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_cfg | self.clk_en_master, + gclk=self.gclk_cfg) + + # Clock gating - pcfg broadcast + self.gclk_pcfg_broadcast = self.var("gclk_pcfg_broadcast", 1) + self.add_child("glb_clk_gate_pcfg_broadcast", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_pcfg_broadcast | self.clk_en_master, + gclk=self.gclk_pcfg_broadcast) + + # Clock gating - ld_dma + self.clk_en_ld_dma = self.var("clk_en_ld_dma", 1) + self.gclk_ld_dma = self.var("gclk_ld_dma", 1) + self.wire(self.clk_en_ld_dma, self.cfg_ld_dma_ctrl['mode'] != 0) + self.clk_en_lddma2bank = self.var("clk_en_lddma2bank", 1) + self.add_child("glb_clk_gate_ld_dma", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_ld_dma | self.clk_en_master, + gclk=self.gclk_ld_dma) + + # Clock gating - st_dma + self.clk_en_st_dma = self.var("clk_en_st_dma", 1) + self.gclk_st_dma = self.var("gclk_st_dma", 1) + self.wire(self.clk_en_st_dma, self.cfg_st_dma_ctrl['mode'] != 0) + self.clk_en_stdma2bank = self.var("clk_en_stdma2bank", 1) + self.add_child("glb_clk_gate_st_dma", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_st_dma | self.clk_en_master, + gclk=self.gclk_st_dma) + + # Clock gating - proc switch + self.clk_en_proc_switch = self.var("clk_en_proc_switch", 1) + self.gclk_proc_switch = self.var("gclk_proc_switch", 1) + self.wire(self.clk_en_proc_switch, self.if_proc_wst_s['wr_clk_en'] | self.if_proc_wst_s['rd_clk_en']) + self.clk_en_procsw2bank = self.var("clk_en_procsw2bank", 1) + self.add_child("glb_clk_gate_proc_switch", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_proc_switch | self.clk_en_master, + gclk=self.gclk_proc_switch) + + # Clock gating - pcfg_dma + self.clk_en_pcfg_dma = self.var("clk_en_pcfg_dma", 1) + self.gclk_pcfg_dma = self.var("gclk_pcfg_dma", 1) + self.wire(self.clk_en_pcfg_dma, self.cfg_pcfg_dma_ctrl['mode'] != 0) + self.clk_en_pcfgdma2bank = self.var("clk_en_pcfgdma2bank", 1) + self.add_child("glb_clk_gate_pcfg_dma", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_pcfg_dma | self.clk_en_master, + gclk=self.gclk_pcfg_dma) + + # Clock gating - strm switch + self.clk_en_strm_switch = self.var("clk_en_strm_switch", 1) + self.gclk_strm_switch = self.var("gclk_strm_switch", 1) + self.wire(self.clk_en_strm_switch, self.cfg_tile_connected_next | self.cfg_tile_connected_prev) + self.clk_en_ring2bank = self.var("clk_en_ring2bank", 1) + self.add_child("glb_clk_gate_strm_switch", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_strm_switch | self.clk_en_master, + gclk=self.gclk_strm_switch) + + # Clock gating - pcfg switch + self.clk_en_pcfg_switch = self.var("clk_en_pcfg_switch", 1) + self.gclk_pcfg_switch = self.var("gclk_pcfg_switch", 1) + self.wire(self.clk_en_pcfg_switch, self.cfg_pcfg_tile_connected_next | self.cfg_pcfg_tile_connected_prev) + self.clk_en_pcfgring2bank = self.var("clk_en_pcfgring2bank", 1) + self.add_child("glb_clk_gate_pcfg_switch", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_pcfg_switch | self.clk_en_master, + gclk=self.gclk_pcfg_switch) + + # Clock gating - bank + self.clk_en_bank = self.var("clk_en_bank", 1) + self.gclk_bank = self.var("gclk_bank", 1) + self.wire(self.clk_en_bank, self.clk_en_lddma2bank | self.clk_en_stdma2bank | self.clk_en_pcfgdma2bank + | self.clk_en_ring2bank | self.clk_en_pcfgring2bank | self.clk_en_procsw2bank) + self.add_child("glb_clk_gate_bank", + ClkGate(_params=self._params), + clk=self.clk, + enable=self.clk_en_bank | self.clk_en_master | self.clk_en_bank_master, + gclk=self.gclk_bank) + + # module instantiation + self.glb_cfg = GlbCfg(_params=self._params) + self.add_child("glb_cfg", + self.glb_cfg, + mclk=self.clk, + gclk=clock(self.gclk_cfg), + reset=self.reset, + glb_tile_id=self.glb_tile_id) + self.wire(self.cfg_tile_connected_next, self.glb_cfg.cfg_data_network['tile_connected']) + self.wire(self.cfg_tile_connected_prev, self.cfg_tile_connected_wsti) + self.wire(self.cfg_tile_connected_next, self.cfg_tile_connected_esto) + self.wire(self.cfg_pcfg_tile_connected_next, self.glb_cfg.cfg_pcfg_network['tile_connected']) + self.wire(self.cfg_pcfg_tile_connected_prev, self.cfg_pcfg_tile_connected_wsti) + self.wire(self.cfg_pcfg_tile_connected_next, self.cfg_pcfg_tile_connected_esto) + self.wire(self.glb_cfg.if_cfg_wst_s, self.if_cfg_wst_s) + self.wire(self.glb_cfg.if_cfg_est_m, self.if_cfg_est_m) + self.wire(self.cfg_st_dma_ctrl, self.glb_cfg.cfg_st_dma_ctrl) + self.wire(self.cfg_st_dma_header, self.glb_cfg.cfg_st_dma_header) + self.wire(self.cfg_ld_dma_ctrl, self.glb_cfg.cfg_ld_dma_ctrl) + self.wire(self.cfg_ld_dma_header, self.glb_cfg.cfg_ld_dma_header) + self.wire(self.cfg_pcfg_dma_ctrl, self.glb_cfg.cfg_pcfg_dma_ctrl) + self.wire(self.cfg_pcfg_dma_header, self.glb_cfg.cfg_pcfg_dma_header) + self.wire(self.cfg_pcfg_broadcast_mux, self.glb_cfg.cfg_pcfg_broadcast_mux) + + self.glb_pcfg_broadcast = GlbPcfgBroadcast(_params=self._params) + self.add_child("glb_pcfg_broadcast", + self.glb_pcfg_broadcast, + clk=clock(self.gclk_pcfg_broadcast), + reset=self.reset, + cgra_cfg_dma2mux=self.cgra_cfg_pcfgdma2mux, + cfg_pcfg_broadcast_mux=self.cfg_pcfg_broadcast_mux) + + self.add_child("glb_store_dma", + GlbStoreDma(_params=self._params), + clk=clock(self.gclk_st_dma), + reset=self.reset, + clk_en_dma2bank=self.clk_en_stdma2bank, + data_f2g=self.strm_data_f2g, + data_valid_f2g=self.strm_data_valid_f2g, + wr_packet_dma2bank=self.wr_packet_dma2bank, + wr_packet_dma2ring=self.wr_packet_dma2ring, + # TODO: How to make this automatic + cfg_tile_connected_prev=self.cfg_tile_connected_prev, + cfg_tile_connected_next=self.cfg_tile_connected_next, + cfg_st_dma_num_repeat=self.cfg_st_dma_ctrl['num_repeat'], + cfg_st_dma_ctrl_use_valid=self.cfg_st_dma_ctrl['use_valid'], + cfg_st_dma_ctrl_mode=self.cfg_st_dma_ctrl['mode'], + cfg_data_network_latency=self.glb_cfg.cfg_data_network['latency'], + cfg_st_dma_header=self.cfg_st_dma_header, + st_dma_start_pulse=self.strm_f2g_start_pulse, + st_dma_done_interrupt=self.strm_f2g_interrupt_pulse, + cfg_data_network_f2g_mux=self.cfg_st_dma_ctrl['data_mux']) + + self.add_child("glb_load_dma", + GlbLoadDma(_params=self._params), + clk=clock(self.gclk_ld_dma), + reset=self.reset, + clk_en_dma2bank=self.clk_en_lddma2bank, + glb_tile_id=self.glb_tile_id, + data_g2f=self.strm_data_g2f, + data_valid_g2f=self.strm_data_valid_g2f, + data_flush=self.data_flush, + rdrq_packet_dma2bank=self.rdrq_packet_dma2bank, + rdrq_packet_dma2ring=self.rdrq_packet_dma2ring, + rdrs_packet_bank2dma=self.rdrs_packet_bank2dma, + rdrs_packet_ring2dma=self.rdrs_packet_ring2dma, + # TODO: How to make this automatic + cfg_tile_connected_prev=self.cfg_tile_connected_prev, + cfg_tile_connected_next=self.cfg_tile_connected_next, + cfg_ld_dma_num_repeat=self.cfg_ld_dma_ctrl['num_repeat'], + cfg_ld_dma_ctrl_use_valid=self.cfg_ld_dma_ctrl['use_valid'], + cfg_ld_dma_ctrl_use_flush=self.cfg_ld_dma_ctrl['use_flush'], + cfg_ld_dma_ctrl_mode=self.cfg_ld_dma_ctrl['mode'], + cfg_data_network_latency=self.glb_cfg.cfg_data_network['latency'], + cfg_ld_dma_header=self.cfg_ld_dma_header, + cfg_data_network_g2f_mux=self.cfg_ld_dma_ctrl['data_mux'], + ld_dma_start_pulse=self.strm_g2f_start_pulse, + ld_dma_done_interrupt=self.strm_g2f_interrupt_pulse) + + self.add_child("glb_pcfg_dma", + GlbPcfgDma(_params=self._params), + clk=clock(self.gclk_pcfg_dma), + reset=self.reset, + clk_en_dma2bank=self.clk_en_pcfgdma2bank, + glb_tile_id=self.glb_tile_id, + cgra_cfg_pcfg=self.cgra_cfg_pcfgdma2mux, + rdrq_packet_dma2bank=self.rdrq_packet_pcfgdma2bank, + rdrq_packet_dma2ring=self.rdrq_packet_pcfgdma2ring, + rdrs_packet_bank2dma=self.rdrs_packet_bank2pcfgdma, + rdrs_packet_ring2dma=self.rdrs_packet_pcfgring2dma, + # TODO: How to make this automatic + cfg_pcfg_tile_connected_prev=self.cfg_pcfg_tile_connected_prev, + cfg_pcfg_tile_connected_next=self.cfg_pcfg_tile_connected_next, + cfg_pcfg_dma_ctrl_mode=self.cfg_pcfg_dma_ctrl['mode'], + cfg_pcfg_dma_ctrl_relocation_value=self.cfg_pcfg_dma_ctrl['relocation_value'], + cfg_pcfg_dma_ctrl_relocation_is_msb=self.cfg_pcfg_dma_ctrl['relocation_is_msb'], + cfg_pcfg_network_latency=self.glb_cfg.cfg_pcfg_network['latency'], + cfg_pcfg_dma_header=self.cfg_pcfg_dma_header, + pcfg_dma_start_pulse=self.pcfg_start_pulse, + pcfg_dma_done_interrupt=self.pcfg_g2f_interrupt_pulse) + + self.glb_bank_mux = GlbBankMux(_params=self._params) + self.add_child("glb_bank_mux", + self.glb_bank_mux, + clk=clock(self.gclk_bank), + reset=self.reset, + glb_tile_id=self.glb_tile_id, + wr_packet_procsw2bank=self.wr_packet_procsw2bank, + wr_packet_ring2bank=self.wr_packet_ring2bank, + wr_packet_dma2bank=self.wr_packet_dma2bank, + wr_packet_sw2bankarr=self.wr_packet_sw2bankarr, + + rdrq_packet_procsw2bank=self.rdrq_packet_procsw2bank, + rdrq_packet_ring2bank=self.rdrq_packet_ring2bank, + rdrq_packet_dma2bank=self.rdrq_packet_dma2bank, + rdrq_packet_pcfgring2bank=self.rdrq_packet_pcfgring2bank, + rdrq_packet_pcfgdma2bank=self.rdrq_packet_pcfgdma2bank, + rdrq_packet_sw2bankarr=self.rdrq_packet_sw2bankarr, + + rdrs_packet_bankarr2sw=self.rdrs_packet_bankarr2sw, + rdrs_packet_bank2procsw=self.rdrs_packet_bank2procsw, + rdrs_packet_bank2dma=self.rdrs_packet_bank2dma, + rdrs_packet_bank2pcfgdma=self.rdrs_packet_bank2pcfgdma, + rdrs_packet_bank2ring=self.rdrs_packet_bank2ring, + rdrs_packet_bank2pcfgring=self.rdrs_packet_bank2pcfgring, + + # cfg + cfg_tile_connected_prev=self.cfg_tile_connected_prev, + cfg_tile_connected_next=self.cfg_tile_connected_next, + cfg_pcfg_tile_connected_prev=self.cfg_pcfg_tile_connected_prev, + cfg_pcfg_tile_connected_next=self.cfg_pcfg_tile_connected_next) + + self.glb_proc_switch = GlbSwitch(self._params, ifc=self.if_proc) + self.add_child("glb_proc_switch", + self.glb_proc_switch, + mclk=self.clk, + gclk=clock(self.gclk_proc_switch), + reset=self.reset, + glb_tile_id=self.glb_tile_id, + if_est_m=self.if_proc_est_m, + if_wst_s=self.if_proc_wst_s, + clk_en_sw2bank=self.clk_en_procsw2bank, + wr_packet=self.wr_packet_procsw2bank, + rdrq_packet=self.rdrq_packet_procsw2bank, + rdrs_packet=self.rdrs_packet_bank2procsw) + + self.add_child("glb_strm_ring_switch", + GlbRingSwitch(_params=self._params, wr_channel=True, rd_channel=True), + clk=clock(self.gclk_strm_switch), + reset=self.reset, + glb_tile_id=self.glb_tile_id, + clk_en_ring2bank=self.clk_en_ring2bank, + wr_packet_w2e_wsti=self.strm_wr_packet_w2e_wsti, + wr_packet_e2w_wsto=self.strm_wr_packet_e2w_wsto, + wr_packet_e2w_esti=self.strm_wr_packet_e2w_esti, + wr_packet_w2e_esto=self.strm_wr_packet_w2e_esto, + rdrq_packet_w2e_wsti=self.strm_rdrq_packet_w2e_wsti, + rdrq_packet_e2w_wsto=self.strm_rdrq_packet_e2w_wsto, + rdrq_packet_e2w_esti=self.strm_rdrq_packet_e2w_esti, + rdrq_packet_w2e_esto=self.strm_rdrq_packet_w2e_esto, + rdrs_packet_w2e_wsti=self.strm_rdrs_packet_w2e_wsti, + rdrs_packet_e2w_wsto=self.strm_rdrs_packet_e2w_wsto, + rdrs_packet_e2w_esti=self.strm_rdrs_packet_e2w_esti, + rdrs_packet_w2e_esto=self.strm_rdrs_packet_w2e_esto, + wr_packet_ring2bank=self.wr_packet_ring2bank, + wr_packet_dma2ring=self.wr_packet_dma2ring, + rdrq_packet_ring2bank=self.rdrq_packet_ring2bank, + rdrq_packet_dma2ring=self.rdrq_packet_dma2ring, + rdrs_packet_ring2dma=self.rdrs_packet_ring2dma, + rdrs_packet_bank2ring=self.rdrs_packet_bank2ring, + cfg_ld_dma_on=(self.cfg_ld_dma_ctrl['mode'] != 0), + cfg_tile_connected_prev=self.cfg_tile_connected_prev, + cfg_tile_connected_next=self.cfg_tile_connected_next) + + self.add_child("glb_pcfg_ring_switch", + GlbRingSwitch(_params=self._params, wr_channel=False, rd_channel=True), + clk=clock(self.gclk_pcfg_switch), + clk_en_ring2bank=self.clk_en_pcfgring2bank, + reset=self.reset, + glb_tile_id=self.glb_tile_id, + rdrq_packet_w2e_wsti=self.pcfg_rdrq_packet_w2e_wsti, + rdrq_packet_e2w_wsto=self.pcfg_rdrq_packet_e2w_wsto, + rdrq_packet_e2w_esti=self.pcfg_rdrq_packet_e2w_esti, + rdrq_packet_w2e_esto=self.pcfg_rdrq_packet_w2e_esto, + rdrs_packet_w2e_wsti=self.pcfg_rdrs_packet_w2e_wsti, + rdrs_packet_e2w_wsto=self.pcfg_rdrs_packet_e2w_wsto, + rdrs_packet_e2w_esti=self.pcfg_rdrs_packet_e2w_esti, + rdrs_packet_w2e_esto=self.pcfg_rdrs_packet_w2e_esto, + rdrq_packet_dma2ring=self.rdrq_packet_pcfgdma2ring, + rdrq_packet_ring2bank=self.rdrq_packet_pcfgring2bank, + rdrs_packet_ring2dma=self.rdrs_packet_pcfgring2dma, + rdrs_packet_bank2ring=self.rdrs_packet_bank2pcfgring, + cfg_ld_dma_on=(self.cfg_pcfg_dma_ctrl['mode'] != 0), + cfg_tile_connected_prev=self.cfg_pcfg_tile_connected_prev, + cfg_tile_connected_next=self.cfg_pcfg_tile_connected_next) + + self.glb_bank_arr = [] + for i in range(self._params.banks_per_tile): + glb_bank = GlbBank(self._params) + self.add_child(f"glb_bank_{i}", + glb_bank, + clk=clock(self.gclk_bank), + reset=self.reset, + wr_packet=self.wr_packet_sw2bankarr[i], + rdrq_packet=self.rdrq_packet_sw2bankarr[i], + rdrs_packet=self.rdrs_packet_bankarr2sw[i]) + self.glb_bank_arr.append(glb_bank) + + if self._params.is_sram_stub: + self.readmemh_block = RawStringStmt(["initial begin", + "\tstring b0_file_name;", + "\tstring b1_file_name;", + "\tstring load_arg;", + "\t$sformat(b0_file_name, \"testvectors/tile%0d_b0.dat\", glb_tile_id);", # noqa + "\t$sformat(b1_file_name, \"testvectors/tile%0d_b1.dat\", glb_tile_id);", # noqa + "\t$sformat(load_arg, \"LOAD%0d\", glb_tile_id);", + "\tif (($test$plusargs(load_arg))) begin", + "\t\t$readmemh(b0_file_name, glb_core.glb_bank_0.glb_bank_memory.glb_bank_sram_stub.mem);", # noqa + "\t\t$readmemh(b1_file_name, glb_core.glb_bank_1.glb_bank_memory.glb_bank_sram_stub.mem);", # noqa + "\tend", + "end"]) + self.writememh_block = RawStringStmt(["final begin", + "\tstring b0_file_name;", + "\tstring b1_file_name;", + "\tstring save_arg;", + "\t$sformat(b0_file_name, \"testvectors/tile%0d_b0_out.dat\", glb_tile_id);", # noqa + "\t$sformat(b1_file_name, \"testvectors/tile%0d_b1_out.dat\", glb_tile_id);", # noqa + "\t$sformat(save_arg, \"SAVE%0d\", glb_tile_id);", + "\tif (($test$plusargs(save_arg))) begin", + "\t\t$writememh(b0_file_name, glb_core.glb_bank_0.glb_bank_memory.glb_bank_sram_stub.mem);", # noqa + "\t\t$writememh(b1_file_name, glb_core.glb_bank_1.glb_bank_memory.glb_bank_sram_stub.mem);", # noqa + "\tend", + "end"]) + self.add_stmt(self.readmemh_block.stmt()) + self.add_stmt(self.writememh_block.stmt()) + + self.pcfg_wiring() + + def struct_wiring(self): + self.wr_packet_sw2bankarr = self.var( + "wr_packet_sw2bankarr", self.header.wr_bank_packet_t, size=self._params.banks_per_tile) + self.rdrq_packet_sw2bankarr = self.var( + "rdrq_packet_sw2bankarr", self.header.rdrq_bank_packet_t, size=self._params.banks_per_tile) + self.rdrs_packet_bankarr2sw = self.var( + "rdrs_packet_bankarr2sw", self.header.rdrs_packet_t, size=self._params.banks_per_tile) + + self.cgra_cfg_pcfgdma2mux = self.var("cgra_cfg_pcfgdma2mux", self.header.cgra_cfg_t) + + self.wr_packet_procsw2bank = self.var("wr_packet_procsw2bank", self.header.wr_packet_t) + self.wr_packet_ring2bank = self.var("wr_packet_ring2bank", self.header.wr_packet_t) + self.wr_packet_dma2ring = self.var("wr_packet_dma2ring", self.header.wr_packet_t) + self.wr_packet_dma2bank = self.var("wr_packet_dma2bank", self.header.wr_packet_t) + + self.rdrq_packet_procsw2bank = self.var("rdrq_packet_procsw2bank", self.header.rdrq_packet_t) + self.rdrq_packet_ring2bank = self.var("rdrq_packet_ring2bank", self.header.rdrq_packet_t) + self.rdrq_packet_dma2ring = self.var("rdrq_packet_dma2ring", self.header.rdrq_packet_t) + self.rdrq_packet_dma2bank = self.var("rdrq_packet_dma2bank", self.header.rdrq_packet_t) + self.rdrq_packet_pcfgdma2bank = self.var("rdrq_packet_pcfgdma2bank", self.header.rdrq_packet_t) + self.rdrq_packet_pcfgring2bank = self.var("rdrq_packet_pcfgring2bank", self.header.rdrq_packet_t) + self.rdrq_packet_pcfgdma2ring = self.var("rdrq_packet_pcfgdma2ring", self.header.rdrq_packet_t) + + self.rdrs_packet_bank2procsw = self.var("rdrs_packet_bank2procsw", self.header.rdrs_packet_t) + self.rdrs_packet_bank2ring = self.var("rdrs_packet_bank2ring", self.header.rdrs_packet_t) + self.rdrs_packet_ring2dma = self.var("rdrs_packet_ring2dma", self.header.rdrs_packet_t) + self.rdrs_packet_bank2dma = self.var("rdrs_packet_bank2dma", self.header.rdrs_packet_t) + self.rdrs_packet_pcfgring2dma = self.var("rdrs_packet_pcfgring2dma", self.header.rdrs_packet_t) + self.rdrs_packet_bank2pcfgring = self.var("rdrs_packet_bank2pcfgring", self.header.rdrs_packet_t) + self.rdrs_packet_bank2pcfgdma = self.var("rdrs_packet_bank2pcfgdma", self.header.rdrs_packet_t) + + self.strm_wr_packet_w2e_wsti = self.var("strm_wr_packet_w2e_wsti", self.header.wr_packet_t) + self.strm_wr_packet_e2w_wsto = self.var("strm_wr_packet_e2w_wsto", self.header.wr_packet_t) + self.strm_wr_packet_e2w_esti = self.var("strm_wr_packet_e2w_esti", self.header.wr_packet_t) + self.strm_wr_packet_w2e_esto = self.var("strm_wr_packet_w2e_esto", self.header.wr_packet_t) + + self.strm_rdrq_packet_w2e_wsti = self.var("strm_rdrq_packet_w2e_wsti", self.header.rdrq_packet_t) + self.strm_rdrq_packet_e2w_wsto = self.var("strm_rdrq_packet_e2w_wsto", self.header.rdrq_packet_t) + self.strm_rdrq_packet_e2w_esti = self.var("strm_rdrq_packet_e2w_esti", self.header.rdrq_packet_t) + self.strm_rdrq_packet_w2e_esto = self.var("strm_rdrq_packet_w2e_esto", self.header.rdrq_packet_t) + + self.strm_rdrs_packet_w2e_wsti = self.var("strm_rdrs_packet_w2e_wsti", self.header.rdrs_packet_t) + self.strm_rdrs_packet_e2w_wsto = self.var("strm_rdrs_packet_e2w_wsto", self.header.rdrs_packet_t) + self.strm_rdrs_packet_e2w_esti = self.var("strm_rdrs_packet_e2w_esti", self.header.rdrs_packet_t) + self.strm_rdrs_packet_w2e_esto = self.var("strm_rdrs_packet_w2e_esto", self.header.rdrs_packet_t) + + self.pcfg_rdrq_packet_w2e_wsti = self.var("pcfg_rdrq_packet_w2e_wsti", self.header.rdrq_packet_t) + self.pcfg_rdrq_packet_e2w_wsto = self.var("pcfg_rdrq_packet_e2w_wsto", self.header.rdrq_packet_t) + self.pcfg_rdrq_packet_e2w_esti = self.var("pcfg_rdrq_packet_e2w_esti", self.header.rdrq_packet_t) + self.pcfg_rdrq_packet_w2e_esto = self.var("pcfg_rdrq_packet_w2e_esto", self.header.rdrq_packet_t) + + self.pcfg_rdrs_packet_w2e_wsti = self.var("pcfg_rdrs_packet_w2e_wsti", self.header.rdrs_packet_t) + self.pcfg_rdrs_packet_e2w_wsto = self.var("pcfg_rdrs_packet_e2w_wsto", self.header.rdrs_packet_t) + self.pcfg_rdrs_packet_e2w_esti = self.var("pcfg_rdrs_packet_e2w_esti", self.header.rdrs_packet_t) + self.pcfg_rdrs_packet_w2e_esto = self.var("pcfg_rdrs_packet_w2e_esto", self.header.rdrs_packet_t) + + for port, _ in self.header.wr_packet_ports: + self.wire(self.strm_wr_packet_w2e_wsti[port], self.strm_w2e_wsti_dict[port]) + for port, _ in self.header.wr_packet_ports: + self.wire(self.strm_wr_packet_w2e_esto[port], self.strm_w2e_esto_dict[port]) + for port, _ in self.header.wr_packet_ports: + self.wire(self.strm_wr_packet_e2w_esti[port], self.strm_e2w_esti_dict[port]) + for port, _ in self.header.wr_packet_ports: + self.wire(self.strm_wr_packet_e2w_wsto[port], self.strm_e2w_wsto_dict[port]) + + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.strm_rdrq_packet_w2e_wsti[port], self.strm_w2e_wsti_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.strm_rdrq_packet_w2e_esto[port], self.strm_w2e_esto_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.strm_rdrq_packet_e2w_esti[port], self.strm_e2w_esti_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.strm_rdrq_packet_e2w_wsto[port], self.strm_e2w_wsto_dict[port]) + + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.strm_rdrs_packet_e2w_wsto[port], self.strm_e2w_wsto_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.strm_rdrs_packet_e2w_esti[port], self.strm_e2w_esti_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.strm_rdrs_packet_w2e_wsti[port], self.strm_w2e_wsti_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.strm_rdrs_packet_w2e_esto[port], self.strm_w2e_esto_dict[port]) + + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.pcfg_rdrq_packet_w2e_wsti[port], self.pcfg_w2e_wsti_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.pcfg_rdrq_packet_w2e_esto[port], self.pcfg_w2e_esto_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.pcfg_rdrq_packet_e2w_esti[port], self.pcfg_e2w_esti_dict[port]) + for port, _ in self.header.rdrq_packet_ports: + self.wire(self.pcfg_rdrq_packet_e2w_wsto[port], self.pcfg_e2w_wsto_dict[port]) + + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.pcfg_rdrs_packet_e2w_wsto[port], self.pcfg_e2w_wsto_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.pcfg_rdrs_packet_e2w_esti[port], self.pcfg_e2w_esti_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.pcfg_rdrs_packet_w2e_wsti[port], self.pcfg_w2e_wsti_dict[port]) + for port, _ in self.header.rdrs_packet_ports: + self.wire(self.pcfg_rdrs_packet_w2e_esto[port], self.pcfg_w2e_esto_dict[port]) + + def pcfg_wiring(self): + cgra_cfg_g2f_w = self.var(f"cgra_cfg_g2f_cfg_w", self.header.cgra_cfg_t, + size=self._params.cgra_per_glb, packed=True) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_g2f, cgra_cfg_g2f_w) + for i in range(self._params.cgra_per_glb): + self.wire(cgra_cfg_g2f_w[i]['wr_en'], self.cgra_cfg_g2f_cfg_wr_en[i]) + self.wire(cgra_cfg_g2f_w[i]['rd_en'], self.cgra_cfg_g2f_cfg_rd_en[i]) + self.wire(cgra_cfg_g2f_w[i]['addr'], self.cgra_cfg_g2f_cfg_addr[i]) + self.wire(cgra_cfg_g2f_w[i]['data'], self.cgra_cfg_g2f_cfg_data[i]) + + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_wsti['wr_en'], self.cgra_cfg_jtag_wr_en_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_wsti['rd_en'], self.cgra_cfg_jtag_rd_en_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_wsti['addr'], self.cgra_cfg_jtag_addr_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_wsti['data'], self.cgra_cfg_jtag_data_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_esto['wr_en'], self.cgra_cfg_jtag_wr_en_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_esto['rd_en'], self.cgra_cfg_jtag_rd_en_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_esto['addr'], self.cgra_cfg_jtag_addr_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_esto['data'], self.cgra_cfg_jtag_data_esto) + + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsti['wr_en'], self.cgra_cfg_pcfg_wr_en_w2e_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsti['rd_en'], self.cgra_cfg_pcfg_rd_en_w2e_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsti['addr'], self.cgra_cfg_pcfg_addr_w2e_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsti['data'], self.cgra_cfg_pcfg_data_w2e_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esto['wr_en'], self.cgra_cfg_pcfg_wr_en_w2e_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esto['rd_en'], self.cgra_cfg_pcfg_rd_en_w2e_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esto['addr'], self.cgra_cfg_pcfg_addr_w2e_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esto['data'], self.cgra_cfg_pcfg_data_w2e_esto) + + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esti['wr_en'], self.cgra_cfg_pcfg_wr_en_e2w_esti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esti['rd_en'], self.cgra_cfg_pcfg_rd_en_e2w_esti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esti['addr'], self.cgra_cfg_pcfg_addr_e2w_esti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_esti['data'], self.cgra_cfg_pcfg_data_e2w_esti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsto['wr_en'], self.cgra_cfg_pcfg_wr_en_e2w_wsto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsto['rd_en'], self.cgra_cfg_pcfg_rd_en_e2w_wsto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsto['addr'], self.cgra_cfg_pcfg_addr_e2w_wsto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_pcfg_wsto['data'], self.cgra_cfg_pcfg_data_e2w_wsto) + + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_rd_en_bypass_wsti, self.cgra_cfg_jtag_rd_en_bypass_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_rd_en_bypass_esto, self.cgra_cfg_jtag_rd_en_bypass_esto) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_addr_bypass_wsti, self.cgra_cfg_jtag_addr_bypass_wsti) + self.wire(self.glb_pcfg_broadcast.cgra_cfg_jtag_addr_bypass_esto, self.cgra_cfg_jtag_addr_bypass_esto) From 8941795aa56c132313caf2a9fb132b4cb903a36c Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 4 Jan 2023 13:32:50 -0800 Subject: [PATCH 60/63] use design_amber for global_buffer.py diffs instead of if-then-else --- global_buffer/design/global_buffer.py | 352 ++++---------------------- 1 file changed, 45 insertions(+), 307 deletions(-) diff --git a/global_buffer/design/global_buffer.py b/global_buffer/design/global_buffer.py index 2cab184d64..07988fbe30 100644 --- a/global_buffer/design/global_buffer.py +++ b/global_buffer/design/global_buffer.py @@ -1,4 +1,3 @@ -import os from kratos import Generator, always_ff, posedge, always_comb, clock_en, clog2, const, concat, resize from kratos.util import to_magma from global_buffer.design.glb_tile import GlbTile @@ -24,11 +23,8 @@ def __init__(self, _params: GlobalBufferParams): self.flush_crossbar_sel = self.input("flush_crossbar_sel", clog2( self._params.num_glb_tiles) * self._params.num_groups) self.reset = self.reset("reset") - if os.getenv('WHICH_SOC') == "amber": - pass - else: - self.cgra_stall_in = self.input("cgra_stall_in", self._params.num_cgra_cols) - self.cgra_stall = self.output("cgra_stall", self._params.num_cgra_cols) + self.cgra_stall_in = self.input("cgra_stall_in", self._params.num_cgra_cols) + self.cgra_stall = self.output("cgra_stall", self._params.num_cgra_cols) self.proc_wr_en = self.input("proc_wr_en", 1) self.proc_wr_strb = self.input("proc_wr_strb", self._params.bank_strb_width) @@ -64,32 +60,21 @@ def __init__(self, _params: GlobalBufferParams): self.strm_data_f2g = self.input("strm_data_f2g", self._params.cgra_data_width, size=[ self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - - if os.getenv('WHICH_SOC') == "amber": - self.strm_data_valid_f2g = self.input("strm_data_valid_f2g", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - else: - self.strm_data_f2g_vld = self.input("strm_data_f2g_vld", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_f2g_rdy = self.output("strm_data_f2g_rdy", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_ctrl_f2g = self.input("strm_ctrl_f2g", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_f2g_vld = self.input("strm_data_f2g_vld", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_f2g_rdy = self.output("strm_data_f2g_rdy", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_ctrl_f2g = self.input("strm_ctrl_f2g", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.strm_data_g2f = self.output("strm_data_g2f", self._params.cgra_data_width, size=[ self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - - if os.getenv('WHICH_SOC') == "amber": - self.strm_data_valid_g2f = self.output("strm_data_valid_g2f", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - else: - self.strm_data_g2f_vld = self.output("strm_data_g2f_vld", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_data_g2f_rdy = self.input("strm_data_g2f_rdy", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - self.strm_ctrl_g2f = self.output("strm_ctrl_g2f", 1, size=[ - self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) - + self.strm_data_g2f_vld = self.output("strm_data_g2f_vld", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_g2f_rdy = self.input("strm_data_g2f_rdy", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_ctrl_g2f = self.output("strm_ctrl_g2f", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) self.strm_data_flush_g2f = self.output("strm_data_flush_g2f", 1, size=self._params.num_groups, packed=True) self.cgra_cfg_g2f_cfg_wr_en = self.output("cgra_cfg_g2f_cfg_wr_en", 1, size=[ @@ -113,12 +98,8 @@ def __init__(self, _params: GlobalBufferParams): self.bank_msb_data_width = self._params.bank_data_width - self._params.axi_data_width # local variables - if os.getenv('WHICH_SOC') == "amber": - self.data_flush = self.var("data_flush", 1, size=self._params.num_glb_tiles, packed=True) - else: - self.data_flush = self.var("data_flush", self._params.num_glb_tiles) - self.data_flush_d = self.var("data_flush_d", self._params.num_glb_tiles) - + self.data_flush = self.var("data_flush", self._params.num_glb_tiles) + self.data_flush_d = self.var("data_flush_d", self._params.num_glb_tiles) self.proc_rd_type_e = self.enum("proc_rd_type_e", {"axi": 0, "jtag": 1}) self.proc_rd_type = self.var("proc_rd_type", self.proc_rd_type_e) self.proc_rd_addr_sel = self.var("proc_rd_addr_sel", 1) @@ -128,11 +109,8 @@ def __init__(self, _params: GlobalBufferParams): self.proc_wr_data_d = self.var("proc_wr_data_d", self._params.bank_data_width) self.proc_rd_en_d = self.var("proc_rd_en_d", 1) self.proc_rd_addr_d = self.var("proc_rd_addr_d", self._params.glb_addr_width) - if os.getenv('WHICH_SOC') == "amber": - pass - else: - self.proc_rd_data_w = self.var("proc_rd_data_w", self._params.bank_data_width) - self.proc_rd_data_valid_w = self.var("proc_rd_data_valid_w", 1) + self.proc_rd_data_w = self.var("proc_rd_data_w", self._params.bank_data_width) + self.proc_rd_data_valid_w = self.var("proc_rd_data_valid_w", 1) self.sram_cfg_wr_en_d = self.var("sram_cfg_wr_en_d", 1) self.sram_cfg_wr_strb_d = self.var("sram_cfg_wr_strb_d", self._params.bank_strb_width) @@ -140,11 +118,8 @@ def __init__(self, _params: GlobalBufferParams): self.sram_cfg_wr_data_d = self.var("sram_cfg_wr_data_d", self._params.bank_data_width) self.sram_cfg_rd_en_d = self.var("sram_cfg_rd_en_d", 1) self.sram_cfg_rd_addr_d = self.var("sram_cfg_rd_addr_d", self._params.glb_addr_width) - if os.getenv('WHICH_SOC') == "amber": - pass - else: - self.if_sram_cfg_rd_data_w = self.var("if_sram_cfg_rd_data_w", self._params.axi_data_width) - self.if_sram_cfg_rd_data_valid_w = self.var("if_sram_cfg_rd_data_valid_w", 1) + self.if_sram_cfg_rd_data_w = self.var("if_sram_cfg_rd_data_w", self._params.axi_data_width) + self.if_sram_cfg_rd_data_valid_w = self.var("if_sram_cfg_rd_data_valid_w", 1) self.cgra_cfg_jtag_gc2glb_wr_en_d = self.var("cgra_cfg_jtag_gc2glb_wr_en_d", 1) self.cgra_cfg_jtag_gc2glb_rd_en_d = self.var("cgra_cfg_jtag_gc2glb_rd_en_d", 1) @@ -270,25 +245,8 @@ def __init__(self, _params: GlobalBufferParams): self.if_sram_cfg_list.append(self.interface( if_sram_cfg_tile2tile, f"if_sram_cfg_tile2tile_{i}")) - if os.getenv('WHICH_SOC') == "amber": - # GLS pipeline - self.strm_g2f_start_pulse_d = self.var("strm_g2f_start_pulse_d", self._params.num_glb_tiles) - self.strm_f2g_start_pulse_d = self.var("strm_f2g_start_pulse_d", self._params.num_glb_tiles) - self.pcfg_start_pulse_d = self.var("pcfg_start_pulse_d", self._params.num_glb_tiles) - self.gls_in = concat(self.strm_g2f_start_pulse, self.strm_f2g_start_pulse, self.pcfg_start_pulse) - self.gls_out = concat(self.strm_g2f_start_pulse_d, self.strm_f2g_start_pulse_d, self.pcfg_start_pulse_d) - - self.gls_pipeline = Pipeline(width=self.gls_in.width, depth=self._params.gls_pipeline_depth) - self.add_child("gls_pipeline", - self.gls_pipeline, - clk=self.clk, - clk_en=const(1, 1), - reset=self.reset, - in_=self.gls_in, - out_=self.gls_out) - else: - # Passthrough cgar_stall signals - self.wire(self.cgra_stall_in, self.cgra_stall) + # Passthrough cgar_stall signals + self.wire(self.cgra_stall_in, self.cgra_stall) # GLB Tiles self.glb_tile = [] @@ -307,13 +265,8 @@ def __init__(self, _params: GlobalBufferParams): self.add_always(self.sram_cfg_pipeline) self.add_always(self.left_edge_proc_wr_ff) self.add_always(self.left_edge_proc_rd_in_ff) - if os.getenv('WHICH_SOC') == "amber": - # self.add_always(self.left_edge_proc_rd_ff) => name changed to 'left_edge_proc_rd_in_ff' - self.add_always(self.left_edge_proc_rd_out) - else: - self.add_always(self.left_edge_proc_rd_out_logic) - self.add_always(self.left_edge_proc_rd_out_ff) - + self.add_always(self.left_edge_proc_rd_out_logic) + self.add_always(self.left_edge_proc_rd_out_ff) self.add_proc_clk_en() self.add_always(self.left_edge_cfg_ff) self.add_always(self.left_edge_cgra_cfg_ff) @@ -327,28 +280,21 @@ def __init__(self, _params: GlobalBufferParams): self.wire(self.if_cfg_rd_data, self.if_cfg_list[0].rd_data) self.wire(self.if_cfg_rd_data_valid, self.if_cfg_list[0].rd_data_valid) - if os.getenv('WHICH_SOC') == "amber": - pass - else: - # Add flush signal pipeline - self.flush_pipeline = Pipeline(width=self.data_flush.width, - depth=self._params.flush_crossbar_pipeline_depth) - self.add_child("flush_pipeline", - self.flush_pipeline, - clk=self.clk, - clk_en=const(1, 1), - reset=self.reset, - in_=self.data_flush, - out_=self.data_flush_d) + # Add flush signal pipeline + self.flush_pipeline = Pipeline(width=self.data_flush.width, + depth=self._params.flush_crossbar_pipeline_depth) + self.add_child("flush_pipeline", + self.flush_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.data_flush, + out_=self.data_flush_d) # Add flush signal crossbar - if os.getenv('WHICH_SOC') == "amber": - pass - else: - flush_crossbar_in = self.var("flush_crossbar_in", 1, size=self._params.num_glb_tiles, packed=True) - for i in range(self._params.num_glb_tiles): - self.wire(flush_crossbar_in[i], self.data_flush_d[i]) - + flush_crossbar_in = self.var("flush_crossbar_in", 1, size=self._params.num_glb_tiles, packed=True) + for i in range(self._params.num_glb_tiles): + self.wire(flush_crossbar_in[i], self.data_flush_d[i]) self.flush_crossbar = GlbCrossbar(width=1, num_input=self._params.num_glb_tiles, num_output=self._params.num_groups) self.flush_crossbar_sel_w = self.var("flush_crossbar_sel_w", clog2(self._params.num_glb_tiles), @@ -357,18 +303,11 @@ def __init__(self, _params: GlobalBufferParams): self.wire(self.flush_crossbar_sel_w[i], self.flush_crossbar_sel[(i + 1) * clog2(self._params.num_glb_tiles) - 1, i * clog2(self._params.num_glb_tiles)]) - if os.getenv('WHICH_SOC') == "amber": - self.add_child("flush_crossbar", - self.flush_crossbar, - in_=self.data_flush, - sel_=self.flush_crossbar_sel_w, - out_=self.strm_data_flush_g2f) - else: - self.add_child("flush_crossbar", - self.flush_crossbar, - in_=flush_crossbar_in, - sel_=self.flush_crossbar_sel_w, - out_=self.strm_data_flush_g2f) + self.add_child("flush_crossbar", + self.flush_crossbar, + in_=flush_crossbar_in, + sel_=self.flush_crossbar_sel_w, + out_=self.strm_data_flush_g2f) @always_ff((posedge, "clk"), (posedge, "reset")) def proc_pipeline(self): @@ -427,10 +366,8 @@ def add_proc_clk_en(self): self.wire(self.if_proc_list[0].wr_clk_en, self.proc_wr_clk_en) self.rd_clk_en_gen = GlbClkEnGen(cnt=2 * self._params.num_glb_tiles + self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin) - if os.getenv('WHICH_SOC') == "amber": pass - else: - self.rd_clk_en_gen.p_cnt.value = 2 * self._params.num_glb_tiles + \ - self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin + self.rd_clk_en_gen.p_cnt.value = 2 * self._params.num_glb_tiles + \ + self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin self.proc_rd_clk_en = self.var("proc_rd_clk_en", 1) self.add_child("proc_rd_clk_en_gen", self.rd_clk_en_gen, @@ -489,7 +426,6 @@ def left_edge_proc_rd_in_ff(self): self.proc_rd_type = self.proc_rd_type self.proc_rd_addr_sel = self.proc_rd_addr_sel - # Only used by onyx @always_comb def left_edge_proc_rd_out_logic(self): if self.proc_rd_type == self.proc_rd_type_e.axi: @@ -512,7 +448,6 @@ def left_edge_proc_rd_out_logic(self): self.if_sram_cfg_rd_data_w = 0 self.if_sram_cfg_rd_data_valid_w = 0 - # Only used by onyx @always_ff((posedge, "clk"), (posedge, "reset")) def left_edge_proc_rd_out_ff(self): if self.reset: @@ -526,30 +461,6 @@ def left_edge_proc_rd_out_ff(self): self.if_sram_cfg_rd_data = self.if_sram_cfg_rd_data_w self.if_sram_cfg_rd_data_valid = self.if_sram_cfg_rd_data_valid_w - # amber uses a different version - if os.getenv('WHICH_SOC') == "amber": - @ always_comb - def left_edge_proc_rd_out(self): - if self.proc_rd_type == self.proc_rd_type_e.axi: - self.proc_rd_data = self.if_proc_list[0].rd_data - self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid - self.if_sram_cfg_rd_data = 0 - self.if_sram_cfg_rd_data_valid = 0 - elif self.proc_rd_type == self.proc_rd_type_e.jtag: - self.proc_rd_data = 0 - self.proc_rd_data_valid = 0 - if self.proc_rd_addr_sel == 0: - self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width - 1, 0] - else: - self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width - * 2 - 1, self._params.axi_data_width] - self.if_sram_cfg_rd_data_valid = self.if_proc_list[0].rd_data_valid - else: - self.proc_rd_data = self.if_proc_list[0].rd_data - self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid - self.if_sram_cfg_rd_data = 0 - self.if_sram_cfg_rd_data_valid = 0 - @ always_ff((posedge, "clk"), (posedge, "reset")) def left_edge_cfg_ff(self): if self.reset: @@ -641,180 +552,7 @@ def tile2tile_w2e_cfg_wiring(self): self.cgra_cfg_pcfg_addr_w2e_wsti[i] = self.cgra_cfg_pcfg_addr_w2e_esto[i - 1] self.cgra_cfg_pcfg_data_w2e_wsti[i] = self.cgra_cfg_pcfg_data_w2e_esto[i - 1] - if os.getenv('WHICH_SOC') == "amber": - def add_glb_tile(self): - for i in range(self._params.num_glb_tiles): - self.add_child(f"glb_tile_gen_{i}", - self.glb_tile[i], - clk=self.clk, - clk_en_pcfg_broadcast=clock_en(~self.pcfg_broadcast_stall[i]), - clk_en_master=clock_en(self.glb_clk_en_master[i]), - clk_en_bank_master=clock_en(self.glb_clk_en_bank_master[i]), - reset=self.reset, - glb_tile_id=i, - - # proc - if_proc_est_m_wr_en=self.if_proc_list[i + 1].wr_en, - if_proc_est_m_wr_clk_en=self.if_proc_list[i + 1].wr_clk_en, - if_proc_est_m_wr_addr=self.if_proc_list[i + 1].wr_addr, - if_proc_est_m_wr_data=self.if_proc_list[i + 1].wr_data, - if_proc_est_m_wr_strb=self.if_proc_list[i + 1].wr_strb, - if_proc_est_m_rd_en=self.if_proc_list[i + 1].rd_en, - if_proc_est_m_rd_clk_en=self.if_proc_list[i + 1].rd_clk_en, - if_proc_est_m_rd_addr=self.if_proc_list[i + 1].rd_addr, - if_proc_est_m_rd_data=self.if_proc_list[i + 1].rd_data, - if_proc_est_m_rd_data_valid=self.if_proc_list[i + 1].rd_data_valid, - - if_proc_wst_s_wr_en=self.if_proc_list[i].wr_en, - if_proc_wst_s_wr_clk_en=self.if_proc_list[i].wr_clk_en, - if_proc_wst_s_wr_addr=self.if_proc_list[i].wr_addr, - if_proc_wst_s_wr_data=self.if_proc_list[i].wr_data, - if_proc_wst_s_wr_strb=self.if_proc_list[i].wr_strb, - if_proc_wst_s_rd_en=self.if_proc_list[i].rd_en, - if_proc_wst_s_rd_clk_en=self.if_proc_list[i].rd_clk_en, - if_proc_wst_s_rd_addr=self.if_proc_list[i].rd_addr, - if_proc_wst_s_rd_data=self.if_proc_list[i].rd_data, - if_proc_wst_s_rd_data_valid=self.if_proc_list[i].rd_data_valid, - - # strm - strm_wr_en_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_en'], - strm_wr_strb_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_strb'], - strm_wr_addr_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_addr'], - strm_wr_data_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_data'], - strm_rd_en_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_en'], - strm_rd_addr_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_addr'], - strm_rd_data_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data'], - strm_rd_data_valid_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], - - strm_wr_en_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_en'], - strm_wr_strb_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_strb'], - strm_wr_addr_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_addr'], - strm_wr_data_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_data'], - strm_rd_en_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_en'], - strm_rd_addr_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_addr'], - strm_rd_data_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data'], - strm_rd_data_valid_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], - - strm_wr_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_en'], - strm_wr_strb_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_strb'], - strm_wr_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_addr'], - strm_wr_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_data'], - strm_rd_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_en'], - strm_rd_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_addr'], - strm_rd_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data'], - strm_rd_data_valid_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], - - strm_wr_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_en'], - strm_wr_strb_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_strb'], - strm_wr_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_addr'], - strm_wr_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_data'], - strm_rd_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_en'], - strm_rd_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_addr'], - strm_rd_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data'], - strm_rd_data_valid_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], - - # pcfg - pcfg_rd_en_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_en'], - pcfg_rd_addr_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_addr'], - pcfg_rd_data_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data'], - pcfg_rd_data_valid_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], - - pcfg_rd_en_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_en'], - pcfg_rd_addr_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_addr'], - pcfg_rd_data_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data'], - pcfg_rd_data_valid_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], - - pcfg_rd_en_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_en'], - pcfg_rd_addr_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_addr'], - pcfg_rd_data_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data'], - pcfg_rd_data_valid_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], - - pcfg_rd_en_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_en'], - pcfg_rd_addr_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_addr'], - pcfg_rd_data_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data'], - pcfg_rd_data_valid_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], - - # cfg - if_cfg_est_m_wr_en=self.if_cfg_list[i + 1].wr_en, - if_cfg_est_m_wr_clk_en=self.if_cfg_list[i + 1].wr_clk_en, - if_cfg_est_m_wr_addr=self.if_cfg_list[i + 1].wr_addr, - if_cfg_est_m_wr_data=self.if_cfg_list[i + 1].wr_data, - if_cfg_est_m_rd_en=self.if_cfg_list[i + 1].rd_en, - if_cfg_est_m_rd_clk_en=self.if_cfg_list[i + 1].rd_clk_en, - if_cfg_est_m_rd_addr=self.if_cfg_list[i + 1].rd_addr, - if_cfg_est_m_rd_data=self.if_cfg_list[i + 1].rd_data, - if_cfg_est_m_rd_data_valid=self.if_cfg_list[i + 1].rd_data_valid, - - if_cfg_wst_s_wr_en=self.if_cfg_list[i].wr_en, - if_cfg_wst_s_wr_clk_en=self.if_cfg_list[i].wr_clk_en, - if_cfg_wst_s_wr_addr=self.if_cfg_list[i].wr_addr, - if_cfg_wst_s_wr_data=self.if_cfg_list[i].wr_data, - if_cfg_wst_s_rd_en=self.if_cfg_list[i].rd_en, - if_cfg_wst_s_rd_clk_en=self.if_cfg_list[i].rd_clk_en, - if_cfg_wst_s_rd_addr=self.if_cfg_list[i].rd_addr, - if_cfg_wst_s_rd_data=self.if_cfg_list[i].rd_data, - if_cfg_wst_s_rd_data_valid=self.if_cfg_list[i].rd_data_valid, - - cfg_tile_connected_wsti=self.cfg_tile_connected[i], - cfg_tile_connected_esto=self.cfg_tile_connected[i + 1], - cfg_pcfg_tile_connected_wsti=self.cfg_pcfg_tile_connected[i], - cfg_pcfg_tile_connected_esto=self.cfg_pcfg_tile_connected[i + 1], - - strm_data_f2g=self.strm_data_f2g[i], - strm_data_valid_f2g=self.strm_data_valid_f2g[i], - strm_data_g2f=self.strm_data_g2f[i], - strm_data_valid_g2f=self.strm_data_valid_g2f[i], - data_flush=self.data_flush[i], - - cgra_cfg_g2f_cfg_wr_en=self.cgra_cfg_g2f_cfg_wr_en[i], - cgra_cfg_g2f_cfg_rd_en=self.cgra_cfg_g2f_cfg_rd_en[i], - cgra_cfg_g2f_cfg_addr=self.cgra_cfg_g2f_cfg_addr[i], - cgra_cfg_g2f_cfg_data=self.cgra_cfg_g2f_cfg_data[i], - - cgra_cfg_pcfg_wr_en_w2e_wsti=self.cgra_cfg_pcfg_wr_en_w2e_wsti[i], - cgra_cfg_pcfg_rd_en_w2e_wsti=self.cgra_cfg_pcfg_rd_en_w2e_wsti[i], - cgra_cfg_pcfg_addr_w2e_wsti=self.cgra_cfg_pcfg_addr_w2e_wsti[i], - cgra_cfg_pcfg_data_w2e_wsti=self.cgra_cfg_pcfg_data_w2e_wsti[i], - - cgra_cfg_pcfg_wr_en_w2e_esto=self.cgra_cfg_pcfg_wr_en_w2e_esto[i], - cgra_cfg_pcfg_rd_en_w2e_esto=self.cgra_cfg_pcfg_rd_en_w2e_esto[i], - cgra_cfg_pcfg_addr_w2e_esto=self.cgra_cfg_pcfg_addr_w2e_esto[i], - cgra_cfg_pcfg_data_w2e_esto=self.cgra_cfg_pcfg_data_w2e_esto[i], - - cgra_cfg_pcfg_wr_en_e2w_esti=self.cgra_cfg_pcfg_wr_en_e2w_esti[i], - cgra_cfg_pcfg_rd_en_e2w_esti=self.cgra_cfg_pcfg_rd_en_e2w_esti[i], - cgra_cfg_pcfg_addr_e2w_esti=self.cgra_cfg_pcfg_addr_e2w_esti[i], - cgra_cfg_pcfg_data_e2w_esti=self.cgra_cfg_pcfg_data_e2w_esti[i], - - cgra_cfg_pcfg_wr_en_e2w_wsto=self.cgra_cfg_pcfg_wr_en_e2w_wsto[i], - cgra_cfg_pcfg_rd_en_e2w_wsto=self.cgra_cfg_pcfg_rd_en_e2w_wsto[i], - cgra_cfg_pcfg_addr_e2w_wsto=self.cgra_cfg_pcfg_addr_e2w_wsto[i], - cgra_cfg_pcfg_data_e2w_wsto=self.cgra_cfg_pcfg_data_e2w_wsto[i], - - cgra_cfg_jtag_wr_en_wsti=self.cgra_cfg_jtag_wr_en_wsti[i], - cgra_cfg_jtag_rd_en_wsti=self.cgra_cfg_jtag_rd_en_wsti[i], - cgra_cfg_jtag_addr_wsti=self.cgra_cfg_jtag_addr_wsti[i], - cgra_cfg_jtag_data_wsti=self.cgra_cfg_jtag_data_wsti[i], - - cgra_cfg_jtag_wr_en_esto=self.cgra_cfg_jtag_wr_en_esto[i], - cgra_cfg_jtag_rd_en_esto=self.cgra_cfg_jtag_rd_en_esto[i], - cgra_cfg_jtag_addr_esto=self.cgra_cfg_jtag_addr_esto[i], - cgra_cfg_jtag_data_esto=self.cgra_cfg_jtag_data_esto[i], - - cgra_cfg_jtag_rd_en_bypass_wsti=self.cgra_cfg_jtag_rd_en_bypass_wsti[i], - cgra_cfg_jtag_addr_bypass_wsti=self.cgra_cfg_jtag_addr_bypass_wsti[i], - cgra_cfg_jtag_rd_en_bypass_esto=self.cgra_cfg_jtag_rd_en_bypass_esto[i], - cgra_cfg_jtag_addr_bypass_esto=self.cgra_cfg_jtag_addr_bypass_esto[i], - - strm_g2f_start_pulse=self.strm_g2f_start_pulse_d[i], - strm_f2g_start_pulse=self.strm_f2g_start_pulse_d[i], - pcfg_start_pulse=self.pcfg_start_pulse_d[i], - strm_f2g_interrupt_pulse=self.strm_f2g_interrupt_pulse_w[i], - strm_g2f_interrupt_pulse=self.strm_g2f_interrupt_pulse_w[i], - pcfg_g2f_interrupt_pulse=self.pcfg_g2f_interrupt_pulse_w[i]) - - else: - def add_glb_tile(self): + def add_glb_tile(self): for i in range(self._params.num_glb_tiles): self.add_child(f"glb_tile_gen_{i}", self.glb_tile[i], From d780d1b9c9c936230d9593a0806d27c3a7ff16db Mon Sep 17 00:00:00 2001 From: steveri Date: Wed, 4 Jan 2023 13:50:53 -0800 Subject: [PATCH 61/63] use design_amber for global_buffer.py diffs instead of if-then-else --- global_buffer/global_buffer_main.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/global_buffer/global_buffer_main.py b/global_buffer/global_buffer_main.py index a7c2f6e410..a3d4421e26 100644 --- a/global_buffer/global_buffer_main.py +++ b/global_buffer/global_buffer_main.py @@ -1,12 +1,17 @@ import argparse -from global_buffer.design.global_buffer import GlobalBuffer + +if os.getenv('WHICH_SOC') == "amber": + from global_buffer.design_amber.global_buffer import GlobalBuffer +else: + from global_buffer.design.global_buffer import GlobalBuffer + + from global_buffer.design.global_buffer_parameter import gen_global_buffer_params, gen_header_files from systemRDL.util import gen_rdl_header import os import pathlib import kratos as k - def gen_param_header(top_name, params, output_folder): svh_filename = os.path.join(output_folder, f"{top_name}.svh") h_filename = os.path.join(output_folder, f"{top_name}.h") From 6147f4f9ff06c844988b634df143a4ece6fcbcf9 Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 6 Jan 2023 11:49:26 -0800 Subject: [PATCH 62/63] merge final few GLB files w/dense version(s) --- global_buffer/design/glb_switch.py | 12 +- global_buffer/design_amber/global_buffer.py | 714 ++++++++++++++++++++ global_buffer/global_buffer_main.py | 5 +- 3 files changed, 723 insertions(+), 8 deletions(-) create mode 100644 global_buffer/design_amber/global_buffer.py diff --git a/global_buffer/design/glb_switch.py b/global_buffer/design/glb_switch.py index f41d6c428f..2aae4ca62c 100644 --- a/global_buffer/design/glb_switch.py +++ b/global_buffer/design/glb_switch.py @@ -1,10 +1,10 @@ -from kratos import Generator, always_ff, always_comb, posedge, concat, const, clog2 +from kratos import Generator, always_ff, always_comb, posedge, concat, const from global_buffer.design.global_buffer_parameter import GlobalBufferParams from global_buffer.design.glb_tile_ifc import GlbTileInterface from global_buffer.design.glb_header import GlbHeader from global_buffer.design.pipeline import Pipeline from global_buffer.design.glb_clk_en_gen import GlbClkEnGen - +import os class GlbSwitch(Generator): def __init__(self, _params: GlobalBufferParams, ifc: GlbTileInterface): @@ -157,7 +157,9 @@ def est_m_rd_clk_en_mux(self): def add_sw2bank_clk_en(self): self.wr_clk_en_gen = GlbClkEnGen(cnt=self._params.tile2sram_wr_delay + self._params.wr_clk_en_margin) - self.wr_clk_en_gen.p_cnt.value = self._params.tile2sram_wr_delay + self._params.wr_clk_en_margin + if os.getenv('WHICH_SOC') == "amber": pass + else: + self.wr_clk_en_gen.p_cnt.value = self._params.tile2sram_wr_delay + self._params.wr_clk_en_margin self.sw2bank_wr_clk_en = self.var("sw2bank_wr_clk_en", 1) self.add_child("sw2bank_wr_clk_en_gen", self.wr_clk_en_gen, @@ -167,7 +169,9 @@ def add_sw2bank_clk_en(self): clk_en=self.sw2bank_wr_clk_en ) self.rd_clk_en_gen = GlbClkEnGen(cnt=self._params.tile2sram_rd_delay + self._params.rd_clk_en_margin) - self.rd_clk_en_gen.p_cnt.value = self._params.tile2sram_rd_delay + self._params.rd_clk_en_margin + if os.getenv('WHICH_SOC') == "amber": pass + else: + self.rd_clk_en_gen.p_cnt.value = self._params.tile2sram_rd_delay + self._params.rd_clk_en_margin self.sw2bank_rd_clk_en = self.var("sw2bank_rd_clk_en", 1) self.add_child("sw2bank_rd_clk_en_gen", self.rd_clk_en_gen, diff --git a/global_buffer/design_amber/global_buffer.py b/global_buffer/design_amber/global_buffer.py new file mode 100644 index 0000000000..5ef4fbac5b --- /dev/null +++ b/global_buffer/design_amber/global_buffer.py @@ -0,0 +1,714 @@ +from kratos import Generator, always_ff, posedge, always_comb, clock_en, clog2, const, concat +from kratos.util import to_magma +from global_buffer.design_amber.glb_tile import GlbTile +from global_buffer.design.glb_tile_ifc import GlbTileInterface +from global_buffer.design.global_buffer_parameter import GlobalBufferParams +from global_buffer.design.glb_header import GlbHeader +from global_buffer.design.pipeline import Pipeline +from global_buffer.design.glb_clk_en_gen import GlbClkEnGen +from global_buffer.design.glb_crossbar import GlbCrossbar +from gemstone.generator.from_magma import FromMagma + + +class GlobalBuffer(Generator): + def __init__(self, _params: GlobalBufferParams): + super().__init__("global_buffer") + self._params = _params + self.header = GlbHeader(self._params) + + self.clk = self.clock("clk") + self.glb_clk_en_master = self.input("glb_clk_en_master", self._params.num_glb_tiles) + self.glb_clk_en_bank_master = self.input("glb_clk_en_bank_master", self._params.num_glb_tiles) + self.pcfg_broadcast_stall = self.input("pcfg_broadcast_stall", self._params.num_glb_tiles) + self.flush_crossbar_sel = self.input("flush_crossbar_sel", clog2( + self._params.num_glb_tiles) * self._params.num_groups) + self.reset = self.reset("reset") + + self.proc_wr_en = self.input("proc_wr_en", 1) + self.proc_wr_strb = self.input("proc_wr_strb", self._params.bank_strb_width) + self.proc_wr_addr = self.input("proc_wr_addr", self._params.glb_addr_width) + self.proc_wr_data = self.input("proc_wr_data", self._params.bank_data_width) + self.proc_rd_en = self.input("proc_rd_en", 1) + self.proc_rd_addr = self.input("proc_rd_addr", self._params.glb_addr_width) + self.proc_rd_data = self.output("proc_rd_data", self._params.bank_data_width) + self.proc_rd_data_valid = self.output("proc_rd_data_valid", 1) + + self.if_cfg_wr_en = self.input("if_cfg_wr_en", 1) + self.if_cfg_wr_clk_en = self.input("if_cfg_wr_clk_en", 1) + self.if_cfg_wr_addr = self.input("if_cfg_wr_addr", self._params.axi_addr_width) + self.if_cfg_wr_data = self.input("if_cfg_wr_data", self._params.axi_data_width) + self.if_cfg_rd_en = self.input("if_cfg_rd_en", 1) + self.if_cfg_rd_clk_en = self.input("if_cfg_rd_clk_en", 1) + self.if_cfg_rd_addr = self.input("if_cfg_rd_addr", self._params.axi_addr_width) + self.if_cfg_rd_data = self.output("if_cfg_rd_data", self._params.axi_data_width) + self.if_cfg_rd_data_valid = self.output("if_cfg_rd_data_valid", 1) + + self.if_sram_cfg_wr_en = self.input("if_sram_cfg_wr_en", 1) + self.if_sram_cfg_wr_addr = self.input("if_sram_cfg_wr_addr", self._params.glb_addr_width) + self.if_sram_cfg_wr_data = self.input("if_sram_cfg_wr_data", self._params.axi_data_width) + self.if_sram_cfg_rd_en = self.input("if_sram_cfg_rd_en", 1) + self.if_sram_cfg_rd_addr = self.input("if_sram_cfg_rd_addr", self._params.glb_addr_width) + self.if_sram_cfg_rd_data = self.output("if_sram_cfg_rd_data", self._params.axi_data_width) + self.if_sram_cfg_rd_data_valid = self.output("if_sram_cfg_rd_data_valid", 1) + + self.cgra_cfg_jtag_gc2glb_wr_en = self.input("cgra_cfg_jtag_gc2glb_wr_en", 1) + self.cgra_cfg_jtag_gc2glb_rd_en = self.input("cgra_cfg_jtag_gc2glb_rd_en", 1) + self.cgra_cfg_jtag_gc2glb_addr = self.input("cgra_cfg_jtag_gc2glb_addr", self._params.cgra_cfg_addr_width) + self.cgra_cfg_jtag_gc2glb_data = self.input("cgra_cfg_jtag_gc2glb_data", self._params.cgra_cfg_data_width) + + self.strm_data_f2g = self.input("strm_data_f2g", self._params.cgra_data_width, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_valid_f2g = self.input("strm_data_valid_f2g", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_g2f = self.output("strm_data_g2f", self._params.cgra_data_width, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_valid_g2f = self.output("strm_data_valid_g2f", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.strm_data_flush_g2f = self.output("strm_data_flush_g2f", 1, size=self._params.num_groups, packed=True) + + self.cgra_cfg_g2f_cfg_wr_en = self.output("cgra_cfg_g2f_cfg_wr_en", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.cgra_cfg_g2f_cfg_rd_en = self.output("cgra_cfg_g2f_cfg_rd_en", 1, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.cgra_cfg_g2f_cfg_addr = self.output("cgra_cfg_g2f_cfg_addr", self._params.cgra_cfg_addr_width, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + self.cgra_cfg_g2f_cfg_data = self.output("cgra_cfg_g2f_cfg_data", self._params.cgra_cfg_data_width, size=[ + self._params.num_glb_tiles, self._params.cgra_per_glb], packed=True) + + self.strm_g2f_start_pulse = self.input("strm_g2f_start_pulse", self._params.num_glb_tiles) + self.strm_f2g_start_pulse = self.input("strm_f2g_start_pulse", self._params.num_glb_tiles) + self.pcfg_start_pulse = self.input("pcfg_start_pulse", self._params.num_glb_tiles) + self.strm_f2g_interrupt_pulse = self.output("strm_f2g_interrupt_pulse", self._params.num_glb_tiles) + self.strm_g2f_interrupt_pulse = self.output("strm_g2f_interrupt_pulse", self._params.num_glb_tiles) + self.pcfg_g2f_interrupt_pulse = self.output("pcfg_g2f_interrupt_pulse", self._params.num_glb_tiles) + + # local parameters + self.bank_lsb_data_width = self._params.axi_data_width + self.bank_msb_data_width = self._params.bank_data_width - self._params.axi_data_width + + # local variables + self.data_flush = self.var("data_flush", 1, size=self._params.num_glb_tiles, packed=True) + self.proc_rd_type_e = self.enum("proc_rd_type_e", {"axi": 0, "jtag": 1}) + self.proc_rd_type = self.var("proc_rd_type", self.proc_rd_type_e) + self.proc_rd_addr_sel = self.var("proc_rd_addr_sel", 1) + self.proc_wr_en_d = self.var("proc_wr_en_d", 1) + self.proc_wr_strb_d = self.var("proc_wr_strb_d", self._params.bank_strb_width) + self.proc_wr_addr_d = self.var("proc_wr_addr_d", self._params.glb_addr_width) + self.proc_wr_data_d = self.var("proc_wr_data_d", self._params.bank_data_width) + self.proc_rd_en_d = self.var("proc_rd_en_d", 1) + self.proc_rd_addr_d = self.var("proc_rd_addr_d", self._params.glb_addr_width) + + self.sram_cfg_wr_en_d = self.var("sram_cfg_wr_en_d", 1) + self.sram_cfg_wr_strb_d = self.var("sram_cfg_wr_strb_d", self._params.bank_strb_width) + self.sram_cfg_wr_addr_d = self.var("sram_cfg_wr_addr_d", self._params.glb_addr_width) + self.sram_cfg_wr_data_d = self.var("sram_cfg_wr_data_d", self._params.bank_data_width) + self.sram_cfg_rd_en_d = self.var("sram_cfg_rd_en_d", 1) + self.sram_cfg_rd_addr_d = self.var("sram_cfg_rd_addr_d", self._params.glb_addr_width) + + self.cgra_cfg_jtag_gc2glb_wr_en_d = self.var("cgra_cfg_jtag_gc2glb_wr_en_d", 1) + self.cgra_cfg_jtag_gc2glb_rd_en_d = self.var("cgra_cfg_jtag_gc2glb_rd_en_d", 1) + self.cgra_cfg_jtag_gc2glb_addr_d = self.var("cgra_cfg_jtag_gc2glb_addr_d", self._params.cgra_cfg_addr_width) + self.cgra_cfg_jtag_gc2glb_data_d = self.var("cgra_cfg_jtag_gc2glb_data_d", self._params.cgra_cfg_data_width) + + self.strm_packet_e2w_esti = self.var( + "strm_packet_e2w_esti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) + self.strm_packet_w2e_wsti = self.var( + "strm_packet_w2e_wsti", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) + self.strm_packet_e2w_wsto = self.var( + "strm_packet_e2w_wsto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) + self.strm_packet_w2e_esto = self.var( + "strm_packet_w2e_esto", self.header.packet_t, size=self._params.num_glb_tiles, packed=True) + + self.pcfg_packet_e2w_esti = self.var( + "pcfg_packet_e2w_esti", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) + self.pcfg_packet_w2e_wsti = self.var( + "pcfg_packet_w2e_wsti", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) + self.pcfg_packet_e2w_wsto = self.var( + "pcfg_packet_e2w_wsto", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) + self.pcfg_packet_w2e_esto = self.var( + "pcfg_packet_w2e_esto", self.header.rd_packet_t, size=self._params.num_glb_tiles, packed=True) + + self.cfg_tile_connected = self.var("cfg_tile_connected", self._params.num_glb_tiles + 1) + self.cfg_pcfg_tile_connected = self.var("cfg_pcfg_tile_connected", self._params.num_glb_tiles + 1) + self.wire(self.cfg_tile_connected[0], 0) + self.wire(self.cfg_pcfg_tile_connected[0], 0) + + self.cgra_cfg_jtag_wr_en_wsti = self.var( + "cgra_cfg_jtag_wr_en_wsti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_rd_en_wsti = self.var( + "cgra_cfg_jtag_rd_en_wsti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_addr_wsti = self.var( + "cgra_cfg_jtag_addr_wsti", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_data_wsti = self.var( + "cgra_cfg_jtag_data_wsti", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_jtag_wr_en_esto = self.var( + "cgra_cfg_jtag_wr_en_esto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_rd_en_esto = self.var( + "cgra_cfg_jtag_rd_en_esto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_addr_esto = self.var( + "cgra_cfg_jtag_addr_esto", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_data_esto = self.var( + "cgra_cfg_jtag_data_esto", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_jtag_rd_en_bypass_wsti = self.var("cgra_cfg_jtag_rd_en_bypass_wsti", 1, + size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_addr_bypass_wsti = self.var("cgra_cfg_jtag_addr_bypass_wsti", + self._params.cgra_cfg_addr_width, + size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_rd_en_bypass_esto = self.var("cgra_cfg_jtag_rd_en_bypass_esto", 1, + size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_jtag_addr_bypass_esto = self.var("cgra_cfg_jtag_addr_bypass_esto", + self._params.cgra_cfg_addr_width, + size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_pcfg_wr_en_w2e_wsti = self.var( + "cgra_cfg_pcfg_wr_en_wsti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_rd_en_w2e_wsti = self.var( + "cgra_cfg_pcfg_rd_en_wsti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_addr_w2e_wsti = self.var( + "cgra_cfg_pcfg_addr_wsti", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_data_w2e_wsti = self.var( + "cgra_cfg_pcfg_data_wsti", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_pcfg_wr_en_w2e_esto = self.var( + "cgra_cfg_pcfg_wr_en_esto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_rd_en_w2e_esto = self.var( + "cgra_cfg_pcfg_rd_en_esto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_addr_w2e_esto = self.var( + "cgra_cfg_pcfg_addr_esto", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_data_w2e_esto = self.var( + "cgra_cfg_pcfg_data_esto", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_pcfg_wr_en_e2w_esti = self.var( + "cgra_cfg_pcfg_wr_en_esti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_rd_en_e2w_esti = self.var( + "cgra_cfg_pcfg_rd_en_esti", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_addr_e2w_esti = self.var( + "cgra_cfg_pcfg_addr_esti", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_data_e2w_esti = self.var( + "cgra_cfg_pcfg_data_esti", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.cgra_cfg_pcfg_wr_en_e2w_wsto = self.var( + "cgra_cfg_pcfg_wr_en_wsto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_rd_en_e2w_wsto = self.var( + "cgra_cfg_pcfg_rd_en_wsto", 1, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_addr_e2w_wsto = self.var( + "cgra_cfg_pcfg_addr_wsto", self._params.cgra_cfg_addr_width, size=self._params.num_glb_tiles, packed=True) + self.cgra_cfg_pcfg_data_e2w_wsto = self.var( + "cgra_cfg_pcfg_data_wsto", self._params.cgra_cfg_data_width, size=self._params.num_glb_tiles, packed=True) + + self.strm_f2g_interrupt_pulse_w = self.var("strm_f2g_interrupt_pulse_w", self._params.num_glb_tiles) + self.strm_f2g_interrupt_pulse_d = self.var("strm_f2g_interrupt_pulse_d", self._params.num_glb_tiles) + self.wire(self.strm_f2g_interrupt_pulse_d, self.strm_f2g_interrupt_pulse) + + self.strm_g2f_interrupt_pulse_w = self.var("strm_g2f_interrupt_pulse_w", self._params.num_glb_tiles) + self.strm_g2f_interrupt_pulse_d = self.var("strm_g2f_interrupt_pulse_d", self._params.num_glb_tiles) + self.wire(self.strm_g2f_interrupt_pulse_d, self.strm_g2f_interrupt_pulse) + + self.pcfg_g2f_interrupt_pulse_w = self.var("pcfg_g2f_interrupt_pulse_w", self._params.num_glb_tiles) + self.pcfg_g2f_interrupt_pulse_d = self.var("pcfg_g2f_interrupt_pulse_d", self._params.num_glb_tiles) + self.wire(self.pcfg_g2f_interrupt_pulse_d, self.pcfg_g2f_interrupt_pulse) + + # interface + if_proc_tile2tile = GlbTileInterface(addr_width=self._params.glb_addr_width, + data_width=self._params.bank_data_width, is_clk_en=True, is_strb=True) + if_cfg_tile2tile = GlbTileInterface(addr_width=self._params.axi_addr_width, + data_width=self._params.axi_data_width, is_clk_en=True, is_strb=False) + if_sram_cfg_tile2tile = GlbTileInterface(addr_width=self._params.glb_addr_width, + data_width=self._params.axi_data_width, is_clk_en=True, is_strb=False) + + self.if_proc_list = [] + self.if_cfg_list = [] + self.if_sram_cfg_list = [] + for i in range(self._params.num_glb_tiles + 1): + self.if_proc_list.append(self.interface( + if_proc_tile2tile, f"if_proc_tile2tile_{i}")) + self.if_cfg_list.append(self.interface( + if_cfg_tile2tile, f"if_cfg_tile2tile_{i}")) + self.if_sram_cfg_list.append(self.interface( + if_sram_cfg_tile2tile, f"if_sram_cfg_tile2tile_{i}")) + + # GLS pipeline + self.strm_g2f_start_pulse_d = self.var("strm_g2f_start_pulse_d", self._params.num_glb_tiles) + self.strm_f2g_start_pulse_d = self.var("strm_f2g_start_pulse_d", self._params.num_glb_tiles) + self.pcfg_start_pulse_d = self.var("pcfg_start_pulse_d", self._params.num_glb_tiles) + self.gls_in = concat(self.strm_g2f_start_pulse, self.strm_f2g_start_pulse, self.pcfg_start_pulse) + self.gls_out = concat(self.strm_g2f_start_pulse_d, self.strm_f2g_start_pulse_d, self.pcfg_start_pulse_d) + + self.gls_pipeline = Pipeline(width=self.gls_in.width, depth=self._params.gls_pipeline_depth) + self.add_child("gls_pipeline", + self.gls_pipeline, + clk=self.clk, + clk_en=const(1, 1), + reset=self.reset, + in_=self.gls_in, + out_=self.gls_out) + + # GLB Tiles + self.glb_tile = [] + for i in range(self._params.num_glb_tiles): + self.glb_tile.append(GlbTile(_params=self._params)) + + self.wire(self.if_proc_list[-1].rd_data, 0) + self.wire(self.if_proc_list[-1].rd_data_valid, 0) + self.wire(self.if_cfg_list[-1].rd_data, 0) + self.wire(self.if_cfg_list[-1].rd_data_valid, 0) + self.wire(self.if_sram_cfg_list[-1].rd_data, 0) + self.wire(self.if_sram_cfg_list[-1].rd_data_valid, 0) + + self.add_glb_tile() + self.add_always(self.proc_pipeline) + self.add_always(self.sram_cfg_pipeline) + self.add_always(self.left_edge_proc_wr_ff) + self.add_always(self.left_edge_proc_rd_ff) + self.add_always(self.left_edge_proc_rd_out) + self.add_proc_clk_en() + self.add_always(self.left_edge_cfg_ff) + self.add_always(self.left_edge_cgra_cfg_ff) + self.tile2tile_e2w_struct_wiring() + self.tile2tile_w2e_struct_wiring() + self.add_always(self.tile2tile_w2e_cfg_wiring) + self.add_always(self.tile2tile_e2w_cfg_wiring) + self.add_always(self.interrupt_pipeline) + + # Directly assign rd_data output ports at the left side + self.wire(self.if_cfg_rd_data, self.if_cfg_list[0].rd_data) + self.wire(self.if_cfg_rd_data_valid, self.if_cfg_list[0].rd_data_valid) + + # Add flush signal crossbar + self.flush_crossbar = GlbCrossbar(width=1, num_input=self._params.num_glb_tiles, + num_output=self._params.num_groups) + self.flush_crossbar_sel_w = self.var("flush_crossbar_sel_w", clog2(self._params.num_glb_tiles), + size=self._params.num_groups, packed=True) + for i in range(self._params.num_groups): + self.wire(self.flush_crossbar_sel_w[i], + self.flush_crossbar_sel[(i + 1) * clog2(self._params.num_glb_tiles) - 1, + i * clog2(self._params.num_glb_tiles)]) + self.add_child("flush_crossbar", + self.flush_crossbar, + in_=self.data_flush, + sel_=self.flush_crossbar_sel_w, + out_=self.strm_data_flush_g2f) + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def proc_pipeline(self): + if self.reset: + self.proc_wr_en_d = 0 + self.proc_wr_strb_d = 0 + self.proc_wr_addr_d = 0 + self.proc_wr_data_d = 0 + self.proc_rd_en_d = 0 + self.proc_rd_addr_d = 0 + else: + self.proc_wr_en_d = self.proc_wr_en + self.proc_wr_strb_d = self.proc_wr_strb + self.proc_wr_addr_d = self.proc_wr_addr + self.proc_wr_data_d = self.proc_wr_data + self.proc_rd_en_d = self.proc_rd_en + self.proc_rd_addr_d = self.proc_rd_addr + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def sram_cfg_pipeline(self): + if self.reset: + self.sram_cfg_wr_en_d = 0 + self.sram_cfg_wr_strb_d = 0 + self.sram_cfg_wr_addr_d = 0 + self.sram_cfg_wr_data_d = 0 + self.sram_cfg_rd_en_d = 0 + self.sram_cfg_rd_addr_d = 0 + else: + self.sram_cfg_wr_en_d = self.if_sram_cfg_wr_en + self.sram_cfg_wr_addr_d = self.if_sram_cfg_wr_addr + if self.if_sram_cfg_wr_addr[self._params.bank_byte_offset - 1] == 0: + self.sram_cfg_wr_data_d = concat(const(0, self.bank_msb_data_width), self.if_sram_cfg_wr_data) + self.sram_cfg_wr_strb_d = concat(const(0, self.bank_msb_data_width // 8), + const(2**(self.bank_lsb_data_width // 8) - 1, + self.bank_lsb_data_width // 8)) + else: + self.sram_cfg_wr_data_d = concat( + self.if_sram_cfg_wr_data[self.bank_msb_data_width - 1, 0], const(0, self.bank_lsb_data_width)) + self.sram_cfg_wr_strb_d = concat(const(2**(self.bank_msb_data_width // 8) - 1, + self.bank_msb_data_width // 8), + const(0, self.bank_lsb_data_width // 8)) + self.sram_cfg_rd_en_d = self.if_sram_cfg_rd_en + self.sram_cfg_rd_addr_d = self.if_sram_cfg_rd_addr + + def add_proc_clk_en(self): + self.wr_clk_en_gen = GlbClkEnGen(cnt=self._params.tile2sram_wr_delay + self._params.proc_clk_en_margin) + + # Unused by amber. Right? + # self.wr_clk_en_gen.p_cnt.value = self._params.tile2sram_wr_delay + self._params.proc_clk_en_margin + + self.proc_wr_clk_en = self.var("proc_wr_clk_en", 1) + self.add_child("proc_wr_clk_en_gen", + self.wr_clk_en_gen, + clk=self.clk, + reset=self.reset, + enable=self.proc_wr_en_d | self.sram_cfg_wr_en_d, + clk_en=self.proc_wr_clk_en + ) + self.wire(self.if_proc_list[0].wr_clk_en, self.proc_wr_clk_en) + self.rd_clk_en_gen = GlbClkEnGen(cnt=2 * self._params.num_glb_tiles + + self._params.tile2sram_rd_delay + self._params.proc_clk_en_margin) + self.proc_rd_clk_en = self.var("proc_rd_clk_en", 1) + self.add_child("proc_rd_clk_en_gen", + self.rd_clk_en_gen, + clk=self.clk, + reset=self.reset, + enable=self.proc_rd_en_d | self.sram_cfg_rd_en_d, + clk_en=self.proc_rd_clk_en + ) + self.wire(self.if_proc_list[0].rd_clk_en, self.proc_rd_clk_en) + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def left_edge_proc_wr_ff(self): + if self.reset: + self.if_proc_list[0].wr_en = 0 + self.if_proc_list[0].wr_strb = 0 + self.if_proc_list[0].wr_addr = 0 + self.if_proc_list[0].wr_data = 0 + else: + if self.proc_wr_en_d: + self.if_proc_list[0].wr_en = self.proc_wr_en_d + self.if_proc_list[0].wr_strb = self.proc_wr_strb_d + self.if_proc_list[0].wr_addr = self.proc_wr_addr_d + self.if_proc_list[0].wr_data = self.proc_wr_data_d + elif self.sram_cfg_wr_en_d: + self.if_proc_list[0].wr_en = self.sram_cfg_wr_en_d + self.if_proc_list[0].wr_strb = self.sram_cfg_wr_strb_d + self.if_proc_list[0].wr_addr = self.sram_cfg_wr_addr_d + self.if_proc_list[0].wr_data = self.sram_cfg_wr_data_d + else: + self.if_proc_list[0].wr_en = self.proc_wr_en_d + self.if_proc_list[0].wr_strb = self.proc_wr_strb_d + self.if_proc_list[0].wr_addr = self.proc_wr_addr_d + self.if_proc_list[0].wr_data = self.proc_wr_data_d + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def left_edge_proc_rd_ff(self): + if self.reset: + self.if_proc_list[0].rd_en = 0 + self.if_proc_list[0].rd_addr = 0 + self.proc_rd_type = self.proc_rd_type_e.axi + self.proc_rd_addr_sel = 0 + else: + if self.proc_rd_en_d: + self.if_proc_list[0].rd_en = self.proc_rd_en_d + self.if_proc_list[0].rd_addr = self.proc_rd_addr_d + self.proc_rd_type = self.proc_rd_type_e.axi + self.proc_rd_addr_sel = 0 + elif self.sram_cfg_rd_en_d: + self.if_proc_list[0].rd_en = self.sram_cfg_rd_en_d + self.if_proc_list[0].rd_addr = self.sram_cfg_rd_addr_d + self.proc_rd_addr_sel = self.sram_cfg_rd_addr_d[self._params.bank_byte_offset - 1] + self.proc_rd_type = self.proc_rd_type_e.jtag + else: + self.if_proc_list[0].rd_en = self.proc_rd_en_d + self.if_proc_list[0].rd_addr = self.proc_rd_addr_d + self.proc_rd_type = self.proc_rd_type + self.proc_rd_addr_sel = self.proc_rd_addr_sel + + @ always_comb + def left_edge_proc_rd_out(self): + if self.proc_rd_type == self.proc_rd_type_e.axi: + self.proc_rd_data = self.if_proc_list[0].rd_data + self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid + self.if_sram_cfg_rd_data = 0 + self.if_sram_cfg_rd_data_valid = 0 + elif self.proc_rd_type == self.proc_rd_type_e.jtag: + self.proc_rd_data = 0 + self.proc_rd_data_valid = 0 + if self.proc_rd_addr_sel == 0: + self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width - 1, 0] + else: + self.if_sram_cfg_rd_data = self.if_proc_list[0].rd_data[self._params.axi_data_width + * 2 - 1, self._params.axi_data_width] + self.if_sram_cfg_rd_data_valid = self.if_proc_list[0].rd_data_valid + else: + self.proc_rd_data = self.if_proc_list[0].rd_data + self.proc_rd_data_valid = self.if_proc_list[0].rd_data_valid + self.if_sram_cfg_rd_data = 0 + self.if_sram_cfg_rd_data_valid = 0 + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def left_edge_cfg_ff(self): + if self.reset: + self.if_cfg_list[0].wr_en = 0 + self.if_cfg_list[0].wr_clk_en = 0 + self.if_cfg_list[0].wr_addr = 0 + self.if_cfg_list[0].wr_data = 0 + self.if_cfg_list[0].rd_en = 0 + self.if_cfg_list[0].rd_clk_en = 0 + self.if_cfg_list[0].rd_addr = 0 + else: + self.if_cfg_list[0].wr_en = self.if_cfg_wr_en + self.if_cfg_list[0].wr_clk_en = self.if_cfg_wr_clk_en + self.if_cfg_list[0].wr_addr = self.if_cfg_wr_addr + self.if_cfg_list[0].wr_data = self.if_cfg_wr_data + self.if_cfg_list[0].rd_en = self.if_cfg_rd_en + self.if_cfg_list[0].rd_clk_en = self.if_cfg_rd_clk_en + self.if_cfg_list[0].rd_addr = self.if_cfg_rd_addr + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def left_edge_cgra_cfg_ff(self): + if self.reset: + self.cgra_cfg_jtag_gc2glb_wr_en_d = 0 + self.cgra_cfg_jtag_gc2glb_rd_en_d = 0 + self.cgra_cfg_jtag_gc2glb_addr_d = 0 + self.cgra_cfg_jtag_gc2glb_data_d = 0 + else: + self.cgra_cfg_jtag_gc2glb_wr_en_d = self.cgra_cfg_jtag_gc2glb_wr_en + self.cgra_cfg_jtag_gc2glb_rd_en_d = self.cgra_cfg_jtag_gc2glb_rd_en + self.cgra_cfg_jtag_gc2glb_addr_d = self.cgra_cfg_jtag_gc2glb_addr + self.cgra_cfg_jtag_gc2glb_data_d = self.cgra_cfg_jtag_gc2glb_data + + # NOTE: Kratos limitation. Struct wiring cannot be easily done using always_comb + def tile2tile_e2w_struct_wiring(self): + self.wire(self.strm_packet_e2w_esti[self._params.num_glb_tiles - 1], 0) + self.wire(self.pcfg_packet_e2w_esti[self._params.num_glb_tiles - 1], 0) + for i in range(self._params.num_glb_tiles - 1): + self.wire(self.strm_packet_e2w_esti[i], self.strm_packet_e2w_wsto[i + 1]) + self.wire(self.pcfg_packet_e2w_esti[i], self.pcfg_packet_e2w_wsto[i + 1]) + + def tile2tile_w2e_struct_wiring(self): + self.wire(self.strm_packet_w2e_wsti[0], 0) + self.wire(self.pcfg_packet_w2e_wsti[0], 0) + for i in range(1, self._params.num_glb_tiles): + self.wire(self.strm_packet_w2e_wsti[i], self.strm_packet_w2e_esto[i - 1]) + self.wire(self.pcfg_packet_w2e_wsti[i], self.pcfg_packet_w2e_esto[i - 1]) + + @ always_comb + def tile2tile_e2w_cfg_wiring(self): + for i in range(self._params.num_glb_tiles): + if i == self._params.num_glb_tiles - 1: + self.cgra_cfg_pcfg_rd_en_e2w_esti[i] = 0 + self.cgra_cfg_pcfg_wr_en_e2w_esti[i] = 0 + self.cgra_cfg_pcfg_addr_e2w_esti[i] = 0 + self.cgra_cfg_pcfg_data_e2w_esti[i] = 0 + else: + self.cgra_cfg_pcfg_rd_en_e2w_esti[i] = self.cgra_cfg_pcfg_rd_en_e2w_wsto[i + 1] + self.cgra_cfg_pcfg_wr_en_e2w_esti[i] = self.cgra_cfg_pcfg_wr_en_e2w_wsto[i + 1] + self.cgra_cfg_pcfg_addr_e2w_esti[i] = self.cgra_cfg_pcfg_addr_e2w_wsto[i + 1] + self.cgra_cfg_pcfg_data_e2w_esti[i] = self.cgra_cfg_pcfg_data_e2w_wsto[i + 1] + + @ always_comb + def tile2tile_w2e_cfg_wiring(self): + for i in range(0, self._params.num_glb_tiles): + if i == 0: + self.cgra_cfg_jtag_rd_en_wsti[i] = 0 + self.cgra_cfg_jtag_wr_en_wsti[i] = self.cgra_cfg_jtag_gc2glb_wr_en_d + self.cgra_cfg_jtag_addr_wsti[i] = self.cgra_cfg_jtag_gc2glb_addr_d + self.cgra_cfg_jtag_data_wsti[i] = self.cgra_cfg_jtag_gc2glb_data_d + + self.cgra_cfg_jtag_rd_en_bypass_wsti[i] = self.cgra_cfg_jtag_gc2glb_rd_en_d + self.cgra_cfg_jtag_addr_bypass_wsti[i] = self.cgra_cfg_jtag_gc2glb_addr_d + + self.cgra_cfg_pcfg_rd_en_w2e_wsti[i] = 0 + self.cgra_cfg_pcfg_wr_en_w2e_wsti[i] = 0 + self.cgra_cfg_pcfg_addr_w2e_wsti[i] = 0 + self.cgra_cfg_pcfg_data_w2e_wsti[i] = 0 + else: + self.cgra_cfg_jtag_rd_en_wsti[i] = self.cgra_cfg_jtag_rd_en_esto[i - 1] + self.cgra_cfg_jtag_wr_en_wsti[i] = self.cgra_cfg_jtag_wr_en_esto[i - 1] + self.cgra_cfg_jtag_addr_wsti[i] = self.cgra_cfg_jtag_addr_esto[i - 1] + self.cgra_cfg_jtag_data_wsti[i] = self.cgra_cfg_jtag_data_esto[i - 1] + + self.cgra_cfg_jtag_rd_en_bypass_wsti[i] = self.cgra_cfg_jtag_rd_en_bypass_esto[i - 1] + self.cgra_cfg_jtag_addr_bypass_wsti[i] = self.cgra_cfg_jtag_addr_bypass_esto[i - 1] + + self.cgra_cfg_pcfg_rd_en_w2e_wsti[i] = self.cgra_cfg_pcfg_rd_en_w2e_esto[i - 1] + self.cgra_cfg_pcfg_wr_en_w2e_wsti[i] = self.cgra_cfg_pcfg_wr_en_w2e_esto[i - 1] + self.cgra_cfg_pcfg_addr_w2e_wsti[i] = self.cgra_cfg_pcfg_addr_w2e_esto[i - 1] + self.cgra_cfg_pcfg_data_w2e_wsti[i] = self.cgra_cfg_pcfg_data_w2e_esto[i - 1] + + def add_glb_tile(self): + for i in range(self._params.num_glb_tiles): + self.add_child(f"glb_tile_gen_{i}", + self.glb_tile[i], + clk=self.clk, + clk_en_pcfg_broadcast=clock_en(~self.pcfg_broadcast_stall[i]), + clk_en_master=clock_en(self.glb_clk_en_master[i]), + clk_en_bank_master=clock_en(self.glb_clk_en_bank_master[i]), + reset=self.reset, + glb_tile_id=i, + + # proc + if_proc_est_m_wr_en=self.if_proc_list[i + 1].wr_en, + if_proc_est_m_wr_clk_en=self.if_proc_list[i + 1].wr_clk_en, + if_proc_est_m_wr_addr=self.if_proc_list[i + 1].wr_addr, + if_proc_est_m_wr_data=self.if_proc_list[i + 1].wr_data, + if_proc_est_m_wr_strb=self.if_proc_list[i + 1].wr_strb, + if_proc_est_m_rd_en=self.if_proc_list[i + 1].rd_en, + if_proc_est_m_rd_clk_en=self.if_proc_list[i + 1].rd_clk_en, + if_proc_est_m_rd_addr=self.if_proc_list[i + 1].rd_addr, + if_proc_est_m_rd_data=self.if_proc_list[i + 1].rd_data, + if_proc_est_m_rd_data_valid=self.if_proc_list[i + 1].rd_data_valid, + + if_proc_wst_s_wr_en=self.if_proc_list[i].wr_en, + if_proc_wst_s_wr_clk_en=self.if_proc_list[i].wr_clk_en, + if_proc_wst_s_wr_addr=self.if_proc_list[i].wr_addr, + if_proc_wst_s_wr_data=self.if_proc_list[i].wr_data, + if_proc_wst_s_wr_strb=self.if_proc_list[i].wr_strb, + if_proc_wst_s_rd_en=self.if_proc_list[i].rd_en, + if_proc_wst_s_rd_clk_en=self.if_proc_list[i].rd_clk_en, + if_proc_wst_s_rd_addr=self.if_proc_list[i].rd_addr, + if_proc_wst_s_rd_data=self.if_proc_list[i].rd_data, + if_proc_wst_s_rd_data_valid=self.if_proc_list[i].rd_data_valid, + + # strm + strm_wr_en_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_en'], + strm_wr_strb_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_strb'], + strm_wr_addr_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_addr'], + strm_wr_data_e2w_esti=self.strm_packet_e2w_esti[i]["wr"]['wr_data'], + strm_rd_en_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_en'], + strm_rd_addr_e2w_esti=self.strm_packet_e2w_esti[i]["rdrq"]['rd_addr'], + strm_rd_data_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data'], + strm_rd_data_valid_e2w_esti=self.strm_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_en'], + strm_wr_strb_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_strb'], + strm_wr_addr_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_addr'], + strm_wr_data_w2e_esto=self.strm_packet_w2e_esto[i]["wr"]['wr_data'], + strm_rd_en_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_en'], + strm_rd_addr_w2e_esto=self.strm_packet_w2e_esto[i]["rdrq"]['rd_addr'], + strm_rd_data_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data'], + strm_rd_data_valid_w2e_esto=self.strm_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_en'], + strm_wr_strb_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_strb'], + strm_wr_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_addr'], + strm_wr_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["wr"]['wr_data'], + strm_rd_en_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_en'], + strm_rd_addr_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrq"]['rd_addr'], + strm_rd_data_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data'], + strm_rd_data_valid_w2e_wsti=self.strm_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], + + strm_wr_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_en'], + strm_wr_strb_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_strb'], + strm_wr_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_addr'], + strm_wr_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["wr"]['wr_data'], + strm_rd_en_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_en'], + strm_rd_addr_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrq"]['rd_addr'], + strm_rd_data_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data'], + strm_rd_data_valid_e2w_wsto=self.strm_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], + + # pcfg + pcfg_rd_en_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_en'], + pcfg_rd_addr_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrq"]['rd_addr'], + pcfg_rd_data_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_e2w_esti=self.pcfg_packet_e2w_esti[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_en'], + pcfg_rd_addr_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrq"]['rd_addr'], + pcfg_rd_data_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_w2e_esto=self.pcfg_packet_w2e_esto[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_en'], + pcfg_rd_addr_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrq"]['rd_addr'], + pcfg_rd_data_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_w2e_wsti=self.pcfg_packet_w2e_wsti[i]["rdrs"]['rd_data_valid'], + + pcfg_rd_en_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_en'], + pcfg_rd_addr_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrq"]['rd_addr'], + pcfg_rd_data_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data'], + pcfg_rd_data_valid_e2w_wsto=self.pcfg_packet_e2w_wsto[i]["rdrs"]['rd_data_valid'], + + # cfg + if_cfg_est_m_wr_en=self.if_cfg_list[i + 1].wr_en, + if_cfg_est_m_wr_clk_en=self.if_cfg_list[i + 1].wr_clk_en, + if_cfg_est_m_wr_addr=self.if_cfg_list[i + 1].wr_addr, + if_cfg_est_m_wr_data=self.if_cfg_list[i + 1].wr_data, + if_cfg_est_m_rd_en=self.if_cfg_list[i + 1].rd_en, + if_cfg_est_m_rd_clk_en=self.if_cfg_list[i + 1].rd_clk_en, + if_cfg_est_m_rd_addr=self.if_cfg_list[i + 1].rd_addr, + if_cfg_est_m_rd_data=self.if_cfg_list[i + 1].rd_data, + if_cfg_est_m_rd_data_valid=self.if_cfg_list[i + 1].rd_data_valid, + + if_cfg_wst_s_wr_en=self.if_cfg_list[i].wr_en, + if_cfg_wst_s_wr_clk_en=self.if_cfg_list[i].wr_clk_en, + if_cfg_wst_s_wr_addr=self.if_cfg_list[i].wr_addr, + if_cfg_wst_s_wr_data=self.if_cfg_list[i].wr_data, + if_cfg_wst_s_rd_en=self.if_cfg_list[i].rd_en, + if_cfg_wst_s_rd_clk_en=self.if_cfg_list[i].rd_clk_en, + if_cfg_wst_s_rd_addr=self.if_cfg_list[i].rd_addr, + if_cfg_wst_s_rd_data=self.if_cfg_list[i].rd_data, + if_cfg_wst_s_rd_data_valid=self.if_cfg_list[i].rd_data_valid, + + cfg_tile_connected_wsti=self.cfg_tile_connected[i], + cfg_tile_connected_esto=self.cfg_tile_connected[i + 1], + cfg_pcfg_tile_connected_wsti=self.cfg_pcfg_tile_connected[i], + cfg_pcfg_tile_connected_esto=self.cfg_pcfg_tile_connected[i + 1], + + strm_data_f2g=self.strm_data_f2g[i], + strm_data_valid_f2g=self.strm_data_valid_f2g[i], + strm_data_g2f=self.strm_data_g2f[i], + strm_data_valid_g2f=self.strm_data_valid_g2f[i], + data_flush=self.data_flush[i], + + cgra_cfg_g2f_cfg_wr_en=self.cgra_cfg_g2f_cfg_wr_en[i], + cgra_cfg_g2f_cfg_rd_en=self.cgra_cfg_g2f_cfg_rd_en[i], + cgra_cfg_g2f_cfg_addr=self.cgra_cfg_g2f_cfg_addr[i], + cgra_cfg_g2f_cfg_data=self.cgra_cfg_g2f_cfg_data[i], + + cgra_cfg_pcfg_wr_en_w2e_wsti=self.cgra_cfg_pcfg_wr_en_w2e_wsti[i], + cgra_cfg_pcfg_rd_en_w2e_wsti=self.cgra_cfg_pcfg_rd_en_w2e_wsti[i], + cgra_cfg_pcfg_addr_w2e_wsti=self.cgra_cfg_pcfg_addr_w2e_wsti[i], + cgra_cfg_pcfg_data_w2e_wsti=self.cgra_cfg_pcfg_data_w2e_wsti[i], + + cgra_cfg_pcfg_wr_en_w2e_esto=self.cgra_cfg_pcfg_wr_en_w2e_esto[i], + cgra_cfg_pcfg_rd_en_w2e_esto=self.cgra_cfg_pcfg_rd_en_w2e_esto[i], + cgra_cfg_pcfg_addr_w2e_esto=self.cgra_cfg_pcfg_addr_w2e_esto[i], + cgra_cfg_pcfg_data_w2e_esto=self.cgra_cfg_pcfg_data_w2e_esto[i], + + cgra_cfg_pcfg_wr_en_e2w_esti=self.cgra_cfg_pcfg_wr_en_e2w_esti[i], + cgra_cfg_pcfg_rd_en_e2w_esti=self.cgra_cfg_pcfg_rd_en_e2w_esti[i], + cgra_cfg_pcfg_addr_e2w_esti=self.cgra_cfg_pcfg_addr_e2w_esti[i], + cgra_cfg_pcfg_data_e2w_esti=self.cgra_cfg_pcfg_data_e2w_esti[i], + + cgra_cfg_pcfg_wr_en_e2w_wsto=self.cgra_cfg_pcfg_wr_en_e2w_wsto[i], + cgra_cfg_pcfg_rd_en_e2w_wsto=self.cgra_cfg_pcfg_rd_en_e2w_wsto[i], + cgra_cfg_pcfg_addr_e2w_wsto=self.cgra_cfg_pcfg_addr_e2w_wsto[i], + cgra_cfg_pcfg_data_e2w_wsto=self.cgra_cfg_pcfg_data_e2w_wsto[i], + + cgra_cfg_jtag_wr_en_wsti=self.cgra_cfg_jtag_wr_en_wsti[i], + cgra_cfg_jtag_rd_en_wsti=self.cgra_cfg_jtag_rd_en_wsti[i], + cgra_cfg_jtag_addr_wsti=self.cgra_cfg_jtag_addr_wsti[i], + cgra_cfg_jtag_data_wsti=self.cgra_cfg_jtag_data_wsti[i], + + cgra_cfg_jtag_wr_en_esto=self.cgra_cfg_jtag_wr_en_esto[i], + cgra_cfg_jtag_rd_en_esto=self.cgra_cfg_jtag_rd_en_esto[i], + cgra_cfg_jtag_addr_esto=self.cgra_cfg_jtag_addr_esto[i], + cgra_cfg_jtag_data_esto=self.cgra_cfg_jtag_data_esto[i], + + cgra_cfg_jtag_rd_en_bypass_wsti=self.cgra_cfg_jtag_rd_en_bypass_wsti[i], + cgra_cfg_jtag_addr_bypass_wsti=self.cgra_cfg_jtag_addr_bypass_wsti[i], + cgra_cfg_jtag_rd_en_bypass_esto=self.cgra_cfg_jtag_rd_en_bypass_esto[i], + cgra_cfg_jtag_addr_bypass_esto=self.cgra_cfg_jtag_addr_bypass_esto[i], + + strm_g2f_start_pulse=self.strm_g2f_start_pulse_d[i], + strm_f2g_start_pulse=self.strm_f2g_start_pulse_d[i], + pcfg_start_pulse=self.pcfg_start_pulse_d[i], + strm_f2g_interrupt_pulse=self.strm_f2g_interrupt_pulse_w[i], + strm_g2f_interrupt_pulse=self.strm_g2f_interrupt_pulse_w[i], + pcfg_g2f_interrupt_pulse=self.pcfg_g2f_interrupt_pulse_w[i]) + + @ always_ff((posedge, "clk"), (posedge, "reset")) + def interrupt_pipeline(self): + if self.reset: + for i in range(self._params.num_glb_tiles): + self.strm_f2g_interrupt_pulse_d[i] = 0 + self.strm_g2f_interrupt_pulse_d[i] = 0 + self.pcfg_g2f_interrupt_pulse_d[i] = 0 + else: + for i in range(self._params.num_glb_tiles): + self.strm_f2g_interrupt_pulse_d[i] = self.strm_f2g_interrupt_pulse_w[i] + self.strm_g2f_interrupt_pulse_d[i] = self.strm_g2f_interrupt_pulse_w[i] + self.pcfg_g2f_interrupt_pulse_d[i] = self.pcfg_g2f_interrupt_pulse_w[i] + + +def GlobalBufferMagma(params: GlobalBufferParams): + dut = GlobalBuffer(params) + circ = to_magma(dut, flatten_array=True) + + return FromMagma(circ) diff --git a/global_buffer/global_buffer_main.py b/global_buffer/global_buffer_main.py index a3d4421e26..2750ccf1ba 100644 --- a/global_buffer/global_buffer_main.py +++ b/global_buffer/global_buffer_main.py @@ -1,14 +1,11 @@ import argparse - +import os if os.getenv('WHICH_SOC') == "amber": from global_buffer.design_amber.global_buffer import GlobalBuffer else: from global_buffer.design.global_buffer import GlobalBuffer - - from global_buffer.design.global_buffer_parameter import gen_global_buffer_params, gen_header_files from systemRDL.util import gen_rdl_header -import os import pathlib import kratos as k From b00523fe86cc9fd93a940267bcb14b6f9cd6104a Mon Sep 17 00:00:00 2001 From: steveri Date: Fri, 13 Jan 2023 11:18:17 -0800 Subject: [PATCH 63/63] last minute fix to improper code --- global_buffer/Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/global_buffer/Makefile b/global_buffer/Makefile index f57434b7b9..9310bb716f 100644 --- a/global_buffer/Makefile +++ b/global_buffer/Makefile @@ -58,20 +58,10 @@ SIM = sim ifeq ($(WHICH_SOC), amber) TESTVECTORS = testvectors_amber SIM = sim_amber -else endif TB_FILES ?= -F $(SIM)/tb_global_buffer.f -# test vectors -# To succeed, amber builds will need to set WHICH_SOC env var to "amber" -# onyx builds don't need to do anything new -ifeq ($(WHICH_SOC), amber) - TESTVECTORS = testvectors_amber -else - TESTVECTORS = testvectors -endif - # ------------------------------------------------------------------- # Commands # -------------------------------------------------------------------