From 987e21356d62fa7a5021623a6923e4bc5c202811 Mon Sep 17 00:00:00 2001
From: Lyalyushkin Nikolay <nikolay.lyalyushkin@intel.com>
Date: Wed, 28 Feb 2024 11:49:40 +0100
Subject: [PATCH] Added int4 config for Mixtral-8x7B weight compression (#255)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Evaluated word perplexity of Mixtral-8x7B on wikitext and found int4
config for weight compression that has a neglible increase (0.4) in the
perplexity from original model.

Mixtral 8x7B  | word_ppl on wikitext
-- | --
Torch CPU | 5.17
OV CPU | 5.17
sym_g128_r100 (default) | 5.98
sym_g128_r90 | 5.60
sym_g128_r80 (added) | 5.55
---
 llm_bench/python/utils/nncf_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llm_bench/python/utils/nncf_utils.py b/llm_bench/python/utils/nncf_utils.py
index bf9721664..f5561e17d 100644
--- a/llm_bench/python/utils/nncf_utils.py
+++ b/llm_bench/python/utils/nncf_utils.py
@@ -47,4 +47,5 @@ def get_compressed_path(output_dir: str, base_precision, option: str):
     "open-llama-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True},
     "falcon-7b-instruct": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True},
     "orca-mini-3b": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "all_layers": True},
+    "mixtral-8x7b-v0.1": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128, "ratio": 0.8},
 }