From 3ec975c2da8d0534d1a251fccf22baa0eb37e949 Mon Sep 17 00:00:00 2001
From: "huangjintao.hjt" <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 30 Aug 2024 16:08:23 +0800
Subject: [PATCH 1/2] update docs

---
 ...\234\200\344\275\263\345\256\236\350\267\265.md" | 13 +++++++++++++
 .../source_en/Multi-Modal/qwen2-vl-best-practice.md | 13 +++++++++++++
 2 files changed, 26 insertions(+)
diff --git "a/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" "b/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
index 710d09ed5..e44b4a23c 100644
--- "a/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
+++ "b/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
@@ -153,6 +153,19 @@ history: [['<img>http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road
 
 ## 微调
 
+### 图像OCR微调
+我们使用 latex-ocr-print 数据集进行微调，该数据集的任务是进行Latex OCR。您可以在 modelscope 上找到该数据集: [https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)
+
+```bash
+# 单卡A10/3090可运行 (OCR微调)
+# GPU Memory: 20GB
+SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
+  --model_type qwen2-vl-7b-instruct \
+  --model_id_or_path qwen/Qwen2-VL-7B-Instruct \
+  --sft_type lora \
+  --dataset latex-ocr-print#20000
+```
+
 ### 图像描述微调
 
 我们使用 coco-en-mini 数据集进行微调，该数据集的任务是对图片内容进行描述。您可以在 modelscope 上找到该数据集: [https://modelscope.cn/datasets/modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)
diff --git a/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md b/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
index cbeff9ab8..c1ffe06fd 100644
--- a/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
+++ b/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
@@ -139,6 +139,19 @@ history: [['<img>http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road
 
 ## Fine-tuning
 
+### Image OCR fine-tuning
+We fine-tune using latex-ocr-printdataset, which aims to describe the content of images. You can find this dataset on ModelScope: [https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)
+
+```bash
+# Single-card A10/3090 can run (OCR fine-tuning).
+# GPU Memory: 20GB
+SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
+  --model_type qwen2-vl-7b-instruct \
+  --model_id_or_path qwen/Qwen2-VL-7B-Instruct \
+  --sft_type lora \
+  --dataset latex-ocr-print#20000
+```
+
 ### Image Description Fine-tuning
 
 We fine-tune using the coco-en-mini dataset, which aims to describe the content of images. You can find this dataset on ModelScope: [https://modelscope.cn/datasets/modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)

From b5d37b896b18cce4e52309c87a32512e570e8d7a Mon Sep 17 00:00:00 2001
From: "huangjintao.hjt" <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 30 Aug 2024 16:11:01 +0800
Subject: [PATCH 2/2] update

---
 ...qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" | 2 +-
 docs/source_en/Multi-Modal/qwen2-vl-best-practice.md            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git "a/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md" "b/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
index e44b4a23c..ea2cac885 100644
--- "a/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
+++ "b/docs/source/Multi-Modal/qwen2-vl\346\234\200\344\275\263\345\256\236\350\267\265.md"
@@ -157,7 +157,7 @@ history: [['<img>http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road
 我们使用 latex-ocr-print 数据集进行微调，该数据集的任务是进行Latex OCR。您可以在 modelscope 上找到该数据集: [https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)
 
 ```bash
-# 单卡A10/3090可运行 (OCR微调)
+# 单卡A10/3090可运行
 # GPU Memory: 20GB
 SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
   --model_type qwen2-vl-7b-instruct \
diff --git a/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md b/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
index c1ffe06fd..2304e153f 100644
--- a/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
+++ b/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
@@ -143,7 +143,7 @@ history: [['<img>http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/road
 We fine-tune using latex-ocr-printdataset, which aims to describe the content of images. You can find this dataset on ModelScope: [https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)
 
 ```bash
-# Single-card A10/3090 can run (OCR fine-tuning).
+# Single-card A10/3090 can run
 # GPU Memory: 20GB
 SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
   --model_type qwen2-vl-7b-instruct \