From 987e21356d62fa7a5021623a6923e4bc5c202811 Mon Sep 17 00:00:00 2001 From: Lyalyushkin Nikolay Date: Wed, 28 Feb 2024 11:49:40 +0100 Subject: [PATCH] Added int4 config for Mixtral-8x7B weight compression (#255) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evaluated word perplexity of Mixtral-8x7B on wikitext and found int4 config for weight compression that has a neglible increase (0.4) in the perplexity from original model. Mixtral 8x7B  | word_ppl on wikitext -- | -- Torch CPU | 5.17 OV CPU | 5.17 sym_g128_r100 (default) | 5.98 sym_g128_r90 | 5.60 sym_g128_r80 (added) | 5.55 --- llm_bench/python/utils/nncf_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llm_bench/python/utils/nncf_utils.py b/llm_bench/python/utils/nncf_utils.py index bf9721664..f5561e17d 100644 --- a/llm_bench/python/utils/nncf_utils.py +++ b/llm_bench/python/utils/nncf_utils.py @@ -47,4 +47,5 @@ def get_compressed_path(output_dir: str, base_precision, option: str): "open-llama-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True}, "falcon-7b-instruct": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True}, "orca-mini-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True}, + "mixtral-8x7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8}, }