From b1b9714dbd857e7747ed8d77a2769f198a50bce9 Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Sun, 29 Jan 2023 23:19:29 +0000 Subject: [PATCH] Add autotuner --- scripts/kernel_9_autotuner.sh | 46 +++++++++++++++++++++++++++++++++++ src/runner.cu | 26 ++++++++++---------- 2 files changed, 59 insertions(+), 13 deletions(-) create mode 100755 scripts/kernel_9_autotuner.sh diff --git a/scripts/kernel_9_autotuner.sh b/scripts/kernel_9_autotuner.sh new file mode 100755 index 0000000..2594496 --- /dev/null +++ b/scripts/kernel_9_autotuner.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -u + +# Define the range of values for each parameter +BK_VALUES="8 16 32 64" +TM_VALUES="4 8 16 32" +TN_VALUES="4 8 16 32" +BM_VALUES="64 128 256" +BN_VALUES="64 128 256" + +# Keep track of best combination and its result +best_result=0 +best_params="" + +cd "$(dirname "$0")" +cd "../build" + +RUNNER="../src/runner.cu" +OUTPUT="../scripts/kernel_9_autotune_results.txt" + +# Loop through all combinations of parameters +for bk in $BK_VALUES; do + for tm in $TM_VALUES; do + for tn in $TN_VALUES; do + for bm in $BM_VALUES; do + for bn in $BN_VALUES; do + # Update the parameters in the source code + sed -i "s/const uint K9_BK = .*/const uint K9_BK = $bk;/" $RUNNER + sed -i "s/const uint K9_TM = .*/const uint K9_TM = $tm;/" $RUNNER + sed -i "s/const uint K9_TN = .*/const uint K9_TN = $tn;/" $RUNNER + sed -i "s/const uint K9_BM = .*/const uint K9_BM = $bm;/" $RUNNER + sed -i "s/const uint K9_BN = .*/const uint K9_BN = $bn;/" $RUNNER + + # Rebuild the program + ninja + + echo "BK=$bk TM=$tm TN=$tn BM=$bm BN=$bn" | tee -a $OUTPUT + + # Run the benchmark and get the result + ./sgemm 9 | tee -a $OUTPUT + done + done + done + done +done \ No newline at end of file diff --git a/src/runner.cu b/src/runner.cu index 9a27f53..46aa9fb 100644 --- a/src/runner.cu +++ b/src/runner.cu @@ -288,24 +288,24 @@ void runSgemmResolveBankExtraCol(int M, int N, int K, float alpha, float *A, void runSgemmAutotuned(int M, int N, int K, float alpha, float *A, float *B, float beta, float *C) { - const uint BK = 16; - const uint TM = 8; - const uint TN = 8; + const uint K9_BK = 16; + const uint K9_TM = 8; + const uint K9_TN = 8; if (M >= 128 and N >= 128) { - const uint BM = 128; - const uint BN = 128; - dim3 gridDim(CEIL_DIV(N, BN), CEIL_DIV(M, BM)); - dim3 blockDim((BM * BN) / (TM * TN)); - sgemmAutotuned + const uint K9_BM = 128; + const uint K9_BN = 128; + dim3 gridDim(CEIL_DIV(N, K9_BN), CEIL_DIV(M, K9_BM)); + dim3 blockDim((K9_BM * K9_BN) / (K9_TM * K9_TN)); + sgemmAutotuned <<>>(M, N, K, alpha, A, B, beta, C); } else { // this is a hacky solution to the underlying problem // of not having proper bounds checking in the kernel - const uint BM = 64; - const uint BN = 64; - dim3 gridDim(CEIL_DIV(N, BN), CEIL_DIV(M, BM)); - dim3 blockDim((BM * BN) / (TM * TN)); - sgemmAutotuned + const uint K9_BM = 64; + const uint K9_BN = 64; + dim3 gridDim(CEIL_DIV(N, K9_BN), CEIL_DIV(M, K9_BM)); + dim3 blockDim((K9_BM * K9_BN) / (K9_TM * K9_TN)); + sgemmAutotuned <<>>(M, N, K, alpha, A, B, beta, C); } }