Skip to content

Commit

Permalink
Add autotuner
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon Boehm committed Jan 29, 2023
1 parent 55d0f9e commit b1b9714
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 13 deletions.
46 changes: 46 additions & 0 deletions scripts/kernel_9_autotuner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env bash

set -u

# Define the range of values for each parameter
BK_VALUES="8 16 32 64"
TM_VALUES="4 8 16 32"
TN_VALUES="4 8 16 32"
BM_VALUES="64 128 256"
BN_VALUES="64 128 256"

# Keep track of best combination and its result
best_result=0
best_params=""

cd "$(dirname "$0")"
cd "../build"

RUNNER="../src/runner.cu"
OUTPUT="../scripts/kernel_9_autotune_results.txt"

# Loop through all combinations of parameters
for bk in $BK_VALUES; do
for tm in $TM_VALUES; do
for tn in $TN_VALUES; do
for bm in $BM_VALUES; do
for bn in $BN_VALUES; do
# Update the parameters in the source code
sed -i "s/const uint K9_BK = .*/const uint K9_BK = $bk;/" $RUNNER
sed -i "s/const uint K9_TM = .*/const uint K9_TM = $tm;/" $RUNNER
sed -i "s/const uint K9_TN = .*/const uint K9_TN = $tn;/" $RUNNER
sed -i "s/const uint K9_BM = .*/const uint K9_BM = $bm;/" $RUNNER
sed -i "s/const uint K9_BN = .*/const uint K9_BN = $bn;/" $RUNNER

# Rebuild the program
ninja

echo "BK=$bk TM=$tm TN=$tn BM=$bm BN=$bn" | tee -a $OUTPUT

# Run the benchmark and get the result
./sgemm 9 | tee -a $OUTPUT
done
done
done
done
done
26 changes: 13 additions & 13 deletions src/runner.cu
Original file line number Diff line number Diff line change
Expand Up @@ -288,24 +288,24 @@ void runSgemmResolveBankExtraCol(int M, int N, int K, float alpha, float *A,

void runSgemmAutotuned(int M, int N, int K, float alpha, float *A, float *B,
float beta, float *C) {
const uint BK = 16;
const uint TM = 8;
const uint TN = 8;
const uint K9_BK = 16;
const uint K9_TM = 8;
const uint K9_TN = 8;
if (M >= 128 and N >= 128) {
const uint BM = 128;
const uint BN = 128;
dim3 gridDim(CEIL_DIV(N, BN), CEIL_DIV(M, BM));
dim3 blockDim((BM * BN) / (TM * TN));
sgemmAutotuned<BM, BN, BK, TM, TN>
const uint K9_BM = 128;
const uint K9_BN = 128;
dim3 gridDim(CEIL_DIV(N, K9_BN), CEIL_DIV(M, K9_BM));
dim3 blockDim((K9_BM * K9_BN) / (K9_TM * K9_TN));
sgemmAutotuned<K9_BM, K9_BN, K9_BK, K9_TM, K9_TN>
<<<gridDim, blockDim>>>(M, N, K, alpha, A, B, beta, C);
} else {
// this is a hacky solution to the underlying problem
// of not having proper bounds checking in the kernel
const uint BM = 64;
const uint BN = 64;
dim3 gridDim(CEIL_DIV(N, BN), CEIL_DIV(M, BM));
dim3 blockDim((BM * BN) / (TM * TN));
sgemmAutotuned<BM, BN, BK, TM, TN>
const uint K9_BM = 64;
const uint K9_BN = 64;
dim3 gridDim(CEIL_DIV(N, K9_BN), CEIL_DIV(M, K9_BM));
dim3 blockDim((K9_BM * K9_BN) / (K9_TM * K9_TN));
sgemmAutotuned<K9_BM, K9_BN, K9_BK, K9_TM, K9_TN>
<<<gridDim, blockDim>>>(M, N, K, alpha, A, B, beta, C);
}
}
Expand Down

0 comments on commit b1b9714

Please sign in to comment.