Skip to content

Commit

Permalink
Better CLI + RunPod Script (#552)
Browse files Browse the repository at this point in the history
  • Loading branch information
casper-hansen authored Jul 23, 2024
1 parent b55f73c commit ca54dea
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 17 deletions.
13 changes: 10 additions & 3 deletions examples/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def main():
parser.add_argument("--no-low_cpu_mem_usage", action="store_false", dest="low_cpu_mem_usage", help="Don't use low CPU memory")
parser.add_argument("--use_cache", action="store_true", help="Use cache")
parser.add_argument("--no-use_cache", action="store_false", dest="use_cache", help="Don't use cache")
parser.add_argument("--device_map", type=str, default=None, help="Device map for loading the pretrained model")

parser.set_defaults(zero_point=True, low_cpu_mem_usage=True, use_cache=False)
parser.set_defaults(zero_point=True, low_cpu_mem_usage=True, use_cache=None)

args = parser.parse_args()

Expand All @@ -34,11 +35,17 @@ def main():

model_config = {
"low_cpu_mem_usage": args.low_cpu_mem_usage,
"use_cache": args.use_cache
}

if args.use_cache is not None:
model_config["use_cache"] = args.use_cache

print(f"Loading model from: {args.hf_model_path}")
model = AutoAWQForCausalLM.from_pretrained(args.hf_model_path, **model_config)
model = AutoAWQForCausalLM.from_pretrained(
args.hf_model_path,
device_map=args.device_map,
**model_config
)
tokenizer = AutoTokenizer.from_pretrained(args.hf_model_path, trust_remote_code=True)

print(f"Quantizing model with config: {quant_config}")
Expand Down
37 changes: 23 additions & 14 deletions scripts/runpod_quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,19 @@
gpu_id = gpu_ids["4090"]
num_gpus = 1
system_memory_gb = 100
system_storage_gb = 20 # fp16 model is downloaded here
volume_storage_gb = 20 # quantized model is saved here
system_storage_gb = 150 # fp16 model is downloaded here
volume_storage_gb = 50 # quantized model is saved here

# Quantization Parameters
hf_model_path = "Qwen/Qwen2-0.5B-Instruct"
quant_name = "qwen2-0.5b-instruct-awq"
local_save_path = f"/workspace/{quant_name}"
hf_upload_path = f"casperhansen/{quant_name}"
INSTALL_TRANSFORMERS_MAIN = False
USE_HF_TRANSFER = False

if USE_HF_TRANSFER:
env_variables["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

cli_args = dict(
hf_model_path = hf_model_path,
Expand All @@ -45,18 +50,22 @@
)
cli_args = " ".join([f"--{k}" if isinstance(v, bool) else f"--{k} {v}" for k,v in cli_args.items()])

docker_command = (
"bash -c '" +
"cd /workspace && " +
"git clone https://github.com/casper-hansen/AutoAWQ.git && " +
"cd AutoAWQ && " +
"pip install -e . && " +
"huggingface-cli login --token $HF_TOKEN && " +
f"python examples/cli.py {cli_args} && " +
f"huggingface-cli upload {hf_upload_path} {local_save_path} ./ && " +
"runpodctl stop pod $RUNPOD_POD_ID" +
"'"
)
commands = [
"cd /workspace",
"git clone https://github.com/casper-hansen/AutoAWQ.git",
"cd AutoAWQ",
"pip install -e .",
"pip install -U git+https://github.com/huggingface/transformers.git" if INSTALL_TRANSFORMERS_MAIN else "",
"pip install hf-transfer" if USE_HF_TRANSFER else "",
"huggingface-cli login --token $HF_TOKEN",
f"python examples/cli.py {cli_args}",
f"huggingface-cli upload {hf_upload_path} {local_save_path} ./",
"runpodctl stop pod $RUNPOD_POD_ID",
]
commands = [cmd for cmd in commands if cmd]
commands = " && ".join(commands)

docker_command = "bash -c '" + commands + "'"

template = runpod.create_template(
name=template_name,
Expand Down

0 comments on commit ca54dea

Please sign in to comment.