Skip to content

Commit

Permalink
Separate pybindings
Browse files Browse the repository at this point in the history
  • Loading branch information
casper-hansen committed Feb 24, 2024
1 parent 365f8b3 commit c00bdad
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
4 changes: 0 additions & 4 deletions awq_ext/pybind_awq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#include "vllm/moe_alig_block.h"
#include "vllm/activation.h"
#include "vllm/topk_softmax_kernels.h"
#include "quantization_new/gemm/gemm_cuda.h"
#include "quantization_new/gemv/gemv_cuda.h"

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
Expand All @@ -22,6 +20,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
m.def("moe_alig_block_size", &moe_alig_block_size, "Aligning the number of tokens to be processed by each expert such that it is divisible by the block size.");
m.def("silu_and_mul", &silu_and_mul, "Activation function used in SwiGLU.");
m.def("topk_softmax", &topk_softmax, "Computes fused topk and softmax operation.");
m.def("gemm_forward_cuda_prefill", &gemm_forward_cuda_prefill, "New quantized GEMM kernel.");
m.def("gemv_forward_cuda_decode", &gemv_forward_cuda_decode, "New quantized GEMM kernel.");
}
10 changes: 10 additions & 0 deletions awq_ext/pybind_awq_v2cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <pybind11/pybind11.h>
#include <torch/extension.h>
#include "quantization_new/gemm/gemm_cuda.h"
#include "quantization_new/gemv/gemv_cuda.h"

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("gemm_forward_cuda_prefill", &gemm_forward_cuda_prefill, "New quantized GEMM kernel.");
m.def("gemv_forward_cuda_decode", &gemv_forward_cuda_decode, "New quantized GEMM kernel.");
}

0 comments on commit c00bdad

Please sign in to comment.