From 216bf472a779cd9b8ced8b4ec630b39359c090b7 Mon Sep 17 00:00:00 2001 From: Bihan Rana Date: Thu, 5 Sep 2024 21:27:08 +0545 Subject: [PATCH] Add TPU examples with optimum-tpu and vLLM --- docs/docs/concepts/fleets.md | 4 +- docs/examples/accelerators/tpu/index.md | 0 examples/accelerators/tpu/README.md | 199 ++++++++++++++++++ examples/deployment/optimum-tpu/.dstack.yml | 18 ++ .../deployment/optimum-tpu/service.dstack.yml | 28 +++ .../deployment/optimum-tpu/task.dstack.yml | 23 ++ .../deployment/vllm/service-tpu.dstack.yml | 40 ++++ .../optimum-tpu/llama31/.dstack.yml | 31 +++ .../optimum-tpu/llama31/config.yaml | 10 + .../optimum-tpu/llama31/train.dstack.yml | 25 +++ .../fine-tuning/optimum-tpu/llama31/train.py | 140 ++++++++++++ mkdocs.yml | 1 + 12 files changed, 517 insertions(+), 2 deletions(-) create mode 100644 docs/examples/accelerators/tpu/index.md create mode 100644 examples/accelerators/tpu/README.md create mode 100644 examples/deployment/optimum-tpu/.dstack.yml create mode 100644 examples/deployment/optimum-tpu/service.dstack.yml create mode 100644 examples/deployment/optimum-tpu/task.dstack.yml create mode 100644 examples/deployment/vllm/service-tpu.dstack.yml create mode 100644 examples/fine-tuning/optimum-tpu/llama31/.dstack.yml create mode 100644 examples/fine-tuning/optimum-tpu/llama31/config.yaml create mode 100644 examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml create mode 100644 examples/fine-tuning/optimum-tpu/llama31/train.py diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md index 76eb56c56..f9639ddb9 100644 --- a/docs/docs/concepts/fleets.md +++ b/docs/docs/concepts/fleets.md @@ -223,8 +223,8 @@ you can set the [`termination_idle_time`](../reference/dstack.yml/fleet.md#termi ## What's next? -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and - [services](services.md) +1. Read about [dev environments](../dev-environments.md), [tasks](../tasks.md), and + [services](../services.md) 2. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) !!! info "Reference" diff --git a/docs/examples/accelerators/tpu/index.md b/docs/examples/accelerators/tpu/index.md new file mode 100644 index 000000000..e69de29bb diff --git a/examples/accelerators/tpu/README.md b/examples/accelerators/tpu/README.md new file mode 100644 index 000000000..52abefaa6 --- /dev/null +++ b/examples/accelerators/tpu/README.md @@ -0,0 +1,199 @@ +# TPU + +If you're using the `gcp` backend, you can use TPUs. Just specify the TPU version and the number of cores +(separated by a dash), in the `gpu` property under `resources`. + +> Currently, maximum 8 TPU cores can be specified, so the maximum supported values are `v2-8`, `v3-8`, `v4-8`, `v5litepod-8`, +> and `v5e-8`. Multi-host TPU support, allowing for larger numbers of cores, is coming soon. + +Below are a few examples on using TPUs for deployment and fine-tuning. + +## Deployment + +### Running as a service +You can use any serving framework, such as vLLM, TGI. Here's an example of a [service](https://dstack.ai/docs/services) that deploys +Llama 3.1 8B using +[Optimum TPU :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu){:target="_blank"} +and [vLLM :material-arrow-top-right-thin:{ .external }](https://github.com/vllm-project/vllm){:target="_blank"}. + +=== "Optimum TPU" + +
+ + ```yaml + type: service + name: llama31-service-optimum-tpu + + image: dstackai/optimum-tpu:llama31 + env: + - HUGGING_FACE_HUB_TOKEN + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - MAX_TOTAL_TOKENS=4096 + - MAX_BATCH_PREFILL_TOKENS=4095 + commands: + - text-generation-launcher --port 8000 + port: 8000 + + spot_policy: auto + resources: + gpu: v5litepod-4 + + model: + format: tgi + type: chat + name: meta-llama/Meta-Llama-3.1-8B-Instruct + ``` +
+ + Note, for `Optimum TPU` by default `MAX_INPUT_TOKEN` is set to 4095, consequently we must set `MAX_BATCH_PREFILL_TOKENS` to 4095. + + ??? info "Docker image" + The official Docker image `huggingface/optimum-tpu:latest` doesn’t support Llama 3.1-8B. + We’ve created a custom image with the fix: `dstackai/optimum-tpu:llama31`. + Once the [pull request :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu/pull/87){:target="_blank"} is merged, + the official Docker image can be used. + +=== "vLLM" +
+ + ```yaml + type: service + name: llama31-service-vllm-tpu + + env: + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - HUGGING_FACE_HUB_TOKEN + - DATE=20240828 + - TORCH_VERSION=2.5.0 + - VLLM_TARGET_DEVICE=tpu + - MAX_MODEL_LEN=4096 + commands: + - pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-${TORCH_VERSION}.dev${DATE}-cp311-cp311-linux_x86_64.whl + - pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}.dev${DATE}-cp311-cp311-linux_x86_64.whl + - pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html + - pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html + - git clone https://github.com/vllm-project/vllm.git + - cd vllm + - pip install -r requirements-tpu.txt + - apt-get install -y libopenblas-base libopenmpi-dev libomp-dev + - python setup.py develop + - vllm serve $MODEL_ID + --tensor-parallel-size 4 + --max-model-len $MAX_MODEL_LEN + --port 8000 + port: + - 8000 + + spot_policy: auto + resources: + gpu: v5litepod-4 + + model: + format: openai + type: chat + name: meta-llama/Meta-Llama-3.1-8B-Instruct + ``` +
+ + Note, when using Llama 3.1 8B with a `v5litepod` which has 16GB memory per core, we must limit the context size to 4096 tokens to fit the memory. + +### Memory requirements + +Below are the approximate memory requirements for serving LLMs with their corresponding TPUs. + +| Model size | bfloat16 | TPU | int8 | TPU | +|------------|----------|--------------|-------|----------------| +| **8B** | 16GB | v5litepod-4 | 8GB | v5litepod-4 | +| **70B** | 140GB | v5litepod-16 | 70GB | v5litepod-16 | +| **405B** | 810GB | v5litepod-64 | 405GB | v5litepod-64 | + +Note, `v5litepod` is optimized for serving transformer-based models. Each core is equipped with 16GB of memory. + +### Supported frameworks + +| Framework | Quantization | Note | +|-----------|----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **TGI** | bfloat16 | To deploy with TGI, Optimum TPU must be used. | +| **vLLM** | int8, bfloat16 | int8 quantization still requires the same memory because the weights are first moved to the TPU in bfloat16, and then converted to int8. See the [pull request :material-arrow-top-right-thin:{ .external }](https://github.com/vllm-project/vllm/pull/7005){:target="_blank"} for more details. | + +### Running a configuration + +Once the configuration is ready, run `dstack apply -f `, and `dstack` will automatically provision the +cloud resources and run the configuration. + +## Fine-tuning with Optimum TPU + +Below is an example of fine-tuning Llama 3.1 8B using [Optimum TPU :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu){:target="_blank"} +and the [Abirate/english_quotes :material-arrow-top-right-thin:{ .external }](https://huggingface.co/datasets/Abirate/english_quotes){:target="_blank"} +dataset. + +
+ +```yaml +type: task +name: optimum-tpu-llama-train + +python: "3.11" + +env: + - HUGGING_FACE_HUB_TOKEN +commands: + - git clone -b add_llama_31_support https://github.com/dstackai/optimum-tpu.git + - mkdir -p optimum-tpu/examples/custom/ + - cp examples/fine-tuning/optimum-tpu/llama31/train.py optimum-tpu/examples/custom/train.py + - cp examples/fine-tuning/optimum-tpu/llama31/config.yaml optimum-tpu/examples/custom/config.yaml + - cd optimum-tpu + - pip install -e . -f https://storage.googleapis.com/libtpu-releases/index.html + - pip install datasets evaluate + - pip install accelerate -U + - pip install peft + - python examples/custom/train.py examples/custom/config.yaml + + +resources: + gpu: v5litepod-8 +``` + +
+ +[//]: # (### Fine-Tuning with TRL) +[//]: # (Use the example `examples/fine-tuning/optimum-tpu/gemma/train.dstack.yml` to Finetune `Gemma-2B` model using `trl` with `dstack` and `optimum-tpu`. ) + +### Memory requirements + +Below are the approximate memory requirements for fine-tuning LLMs with their corresponding TPUs. + +| Model size | LoRA | TPU | +|------------|-------|--------------| +| **8B** | 16GB | v5litepod-8 | +| **70B** | 160GB | v5litepod-16 | +| **405B** | 950GB | v5litepod-64 | + +Note, `v5litepod` is optimized for fine-tuning transformer-based models. Each core is equipped with 16GB of memory. + +### Supported frameworks + +| Framework | Quantization | Note | +|-----------------|--------------|---------------------------------------------------------------------------------------------------| +| **TRL** | bfloat16 | To fine-tune using TRL, Optimum TPU is recommended. TRL doesn't support Llama 3.1 out of the box. | +| **Pytorch XLA** | bfloat16 | | + +## Dev environments + +Before running a task or service, it's recommended that you first start with +a [dev environment](https://dstack.ai/docs/dev-environments). Dev environments +allow you to run commands interactively. + +## Source code + +The source-code of this example can be found in +[examples/deployment/optimum-tpu :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/llms/llama31){:target="_blank"} +and [examples/fine-tuning/optimum-tpu :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/fine-tuning/trl){:target="_blank"}. + +## What's next? + +1. Browse [Optimum TPU :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu), + [Optimum TPU TGI :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu/tree/main/text-generation-inference) and + [vLLM :material-arrow-top-right-thin:{ .external }](https://docs.vllm.ai/en/latest/getting_started/tpu-installation.html). +2. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks), + [services](https://dstack.ai/docs/services), and [fleets](https://dstack.ai/docs/fleets). diff --git a/examples/deployment/optimum-tpu/.dstack.yml b/examples/deployment/optimum-tpu/.dstack.yml new file mode 100644 index 000000000..f34d3e9bb --- /dev/null +++ b/examples/deployment/optimum-tpu/.dstack.yml @@ -0,0 +1,18 @@ +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode-optimum-tpu + +# Using a Docker image with a fix instead of the official one +# More details at https://github.com/huggingface/optimum-tpu/pull/87 +image: dstackai/optimum-tpu:llama31 +# Required environment variables +env: + - HUGGING_FACE_HUB_TOKEN +ide: vscode + +resources: + # Required resources + gpu: v5litepod-4 + +# Use either spot or on-demand instances +spot_policy: auto diff --git a/examples/deployment/optimum-tpu/service.dstack.yml b/examples/deployment/optimum-tpu/service.dstack.yml new file mode 100644 index 000000000..1b9ad8db3 --- /dev/null +++ b/examples/deployment/optimum-tpu/service.dstack.yml @@ -0,0 +1,28 @@ +type: service +# The name is optional, if not specified, generated randomly +name: llama31-service-optimum-tpu + +# Using a Docker image with a fix instead of the official one +# More details at https://github.com/huggingface/optimum-tpu/pull/87 +image: dstackai/optimum-tpu:llama31 +# Required environment variables +env: + - HUGGING_FACE_HUB_TOKEN + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - MAX_TOTAL_TOKENS=4096 + - MAX_BATCH_PREFILL_TOKENS=4095 +commands: + - text-generation-launcher --port 8000 +port: 8000 + +resources: + # Required resources + gpu: v5litepod-4 + +# Use either spot or on-demand instances +spot_policy: auto + +model: + format: tgi + type: chat + name: meta-llama/Meta-Llama-3.1-8B-Instruct \ No newline at end of file diff --git a/examples/deployment/optimum-tpu/task.dstack.yml b/examples/deployment/optimum-tpu/task.dstack.yml new file mode 100644 index 000000000..8a581e14b --- /dev/null +++ b/examples/deployment/optimum-tpu/task.dstack.yml @@ -0,0 +1,23 @@ +type: task +# The name is optional, if not specified, generated randomly +name: llama31-task-optimum-tpu + +# Using a Docker image with a fix instead of the official one +# More details at https://github.com/huggingface/optimum-tpu/pull/87 +image: dstackai/optimum-tpu:llama31 +# Required environment variables +env: + - HUGGING_FACE_HUB_TOKEN + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - MAX_TOTAL_TOKENS=4096 + - MAX_BATCH_PREFILL_TOKENS=4095 +commands: + - text-generation-launcher --port 8000 +ports: [8000] + +resources: + # Required resources + gpu: v5litepod-4 + +# Use either spot or on-demand instances +spot_policy: auto \ No newline at end of file diff --git a/examples/deployment/vllm/service-tpu.dstack.yml b/examples/deployment/vllm/service-tpu.dstack.yml new file mode 100644 index 000000000..230a1c539 --- /dev/null +++ b/examples/deployment/vllm/service-tpu.dstack.yml @@ -0,0 +1,40 @@ +type: service +# The name is optional, if not specified, generated randomly +name: llama31-service-vllm-tpu + +env: + - HUGGING_FACE_HUB_TOKEN + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - DATE=20240828 + - TORCH_VERSION=2.5.0 + - VLLM_TARGET_DEVICE=tpu + - MAX_MODEL_LEN=4096 + +commands: + - pip install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-${TORCH_VERSION}.dev${DATE}-cp311-cp311-linux_x86_64.whl + - pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}.dev${DATE}-cp311-cp311-linux_x86_64.whl + - pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html + - pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html + - git clone https://github.com/vllm-project/vllm.git + - cd vllm + - pip install -r requirements-tpu.txt + - apt-get install -y libopenblas-base libopenmpi-dev libomp-dev + - python setup.py develop + - vllm serve $MODEL_ID + --tensor-parallel-size 4 + --max-model-len $MAX_MODEL_LEN + --port 8000 + +# Expose the vllm server port +port: 8000 + +spot_policy: auto + +resources: + gpu: v5litepod-4 + +# (Optional) Enable the OpenAI-compatible endpoint +model: + format: openai + type: chat + name: meta-llama/Meta-Llama-3.1-8B-Instruct \ No newline at end of file diff --git a/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml b/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml new file mode 100644 index 000000000..8dc522e0e --- /dev/null +++ b/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml @@ -0,0 +1,31 @@ +type: dev-environment +# The name is optional, if not specified, generated randomly +name: optimum-tpu-vscode + +# If `image` is not specified, dstack uses its default image +python: "3.11" + +# Required environment variables +env: + - HUGGING_FACE_HUB_TOKEN + +# Refer to Note section in examples/gpus/tpu/README.md for more information about the optimum-tpu repository. +# Uncomment if you want the environment to be pre-installed +#init: +# - git clone -b add_llama_31_support https://github.com/dstackai/optimum-tpu.git +# - mkdir -p optimum-tpu/examples/custom/ +# - cp examples/fine-tuning/optimum-tpu/llama31/train.py optimum-tpu/examples/custom/train.py +# - cp examples/fine-tuning/optimum-tpu/llama31/config.yaml optimum-tpu/examples/custom/config.yaml +# - cd optimum-tpu +# - pip install -e . -f https://storage.googleapis.com/libtpu-releases/index.html +# - pip install datasets evaluate +# - pip install accelerate -U +# - pip install peft + +ide: vscode + +# Use either spot or on-demand instances +spot_policy: auto + +resources: + gpu: v5litepod-8 \ No newline at end of file diff --git a/examples/fine-tuning/optimum-tpu/llama31/config.yaml b/examples/fine-tuning/optimum-tpu/llama31/config.yaml new file mode 100644 index 000000000..4b0aea529 --- /dev/null +++ b/examples/fine-tuning/optimum-tpu/llama31/config.yaml @@ -0,0 +1,10 @@ +per_device_train_batch_size: 24 +per_device_eval_batch_size: 8 +num_train_epochs: 1 +max_steps: -1 +output_dir: "./finetuned_models/llama3_fine_tuned" +optim: "adafactor" +dataset_name: "Abirate/english_quotes" +model_name: "meta-llama/Meta-Llama-3.1-8B" +lora_r: 4 +push_to_hub: True \ No newline at end of file diff --git a/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml b/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml new file mode 100644 index 000000000..04fdfb744 --- /dev/null +++ b/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml @@ -0,0 +1,25 @@ +type: task +# The name is optional, if not specified, generated randomly +name: optimum-tpu-llama-train + +python: "3.11" + +# Required environment variables +env: + - HUGGING_FACE_HUB_TOKEN + +# Commands of the task +commands: + - git clone -b add_llama_31_support https://github.com/dstackai/optimum-tpu.git + - mkdir -p optimum-tpu/examples/custom/ + - cp examples/fine-tuning/optimum-tpu/llama31/train.py optimum-tpu/examples/custom/train.py + - cp examples/fine-tuning/optimum-tpu/llama31/config.yaml optimum-tpu/examples/custom/config.yaml + - cd optimum-tpu + - pip install -e . -f https://storage.googleapis.com/libtpu-releases/index.html + - pip install datasets evaluate + - pip install accelerate -U + - pip install peft + - python examples/custom/train.py examples/custom/config.yaml + +resources: + gpu: v5litepod-8 \ No newline at end of file diff --git a/examples/fine-tuning/optimum-tpu/llama31/train.py b/examples/fine-tuning/optimum-tpu/llama31/train.py new file mode 100644 index 000000000..0c8c8a614 --- /dev/null +++ b/examples/fine-tuning/optimum-tpu/llama31/train.py @@ -0,0 +1,140 @@ +from dataclasses import dataclass, field +from typing import Optional + +from datasets import load_dataset +from optimum.tpu import AutoModelForCausalLM, fsdp_v2 +from peft import LoraConfig, TaskType, get_peft_model +from transformers import ( + AutoTokenizer, + DataCollatorForLanguageModeling, + HfArgumentParser, + Trainer, + TrainingArguments, +) + + +@dataclass +class ScriptArguments: + per_device_train_batch_size: Optional[int] = field( + default=8, metadata={"help": "Batch size per device for training."} + ) + per_device_eval_batch_size: Optional[int] = field( + default=8, metadata={"help": "Batch size per device for evaluation."} + ) + num_train_epochs: Optional[int] = field( + default=1, + metadata={"help": "The number of training epochs for the SFTTrainer."}, + ) + max_steps: int = field( + default=-1, metadata={"help": "How many optimizer update steps to take"} + ) + output_dir: str = field( + default="./results", + metadata={ + "help": "The output directory where the model predictions and checkpoints will be written." + }, + ) + optim: Optional[str] = field( + default="adafactor", + metadata={"help": "The optimizer to use."}, + ) + dataset_name: Optional[str] = field( + default="Abirate/english_quotes", + metadata={"help": "The dataset to use."}, + ) + model_name: Optional[str] = field( + default="meta-llama/Meta-Llama-3.1-8B", + metadata={ + "help": "Only models Gemma 2B, Gemma 7B, Llama-2 7B and Llama-3 8B Llama-3.1 8B are tested with TPU v5e" + }, + ) + lora_r: Optional[int] = field(default=4, metadata={"help": "LoRA attention dimension."}) + max_seq_length: Optional[int] = field( + default=1024, metadata={"help": "Maximum sequence length to use."} + ) + packing: Optional[bool] = field( + default=True, + metadata={"help": "Use packing dataset creating."}, + ) + push_to_hub: Optional[bool] = field( + default=True, + metadata={"help": "Push fined tuned model to hub."}, + ) + + +def create_and_prepare_model(args): + base_model = AutoModelForCausalLM.from_pretrained(args.model_name) + lora_config = LoraConfig( + r=args.lora_r, # the dimension of the low-rank matrices + lora_alpha=8, # scaling factor for LoRA activations vs pre-trained weight activations + lora_dropout=0.05, + bias="none", + inference_mode=False, + task_type=TaskType.CAUSAL_LM, + target_modules=["o_proj", "v_proj"], + ) # + + model = get_peft_model(base_model, lora_config) + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + # Add custom token for padding Llama + tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token}) + return model, tokenizer + + +def create_and_prepare_trainer(model, tokenizer, dataset, args): + data = dataset.map(lambda samples: tokenizer(samples["quote"]), batched=True) + fsdp_training_args = fsdp_v2.get_fsdp_training_args(model) + + trainer = Trainer( + model=model, + train_dataset=data["train"], + args=TrainingArguments( + per_device_train_batch_size=args.per_device_train_batch_size, + num_train_epochs=args.num_train_epochs, + max_steps=args.max_steps, + output_dir=args.output_dir, + optim=args.optim, + logging_steps=1, + dataloader_drop_last=True, # Required by FSDP v2 and SPMD. + **fsdp_training_args, + ), + data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), + ) + + return trainer + + +def parse_config() -> ScriptArguments: + import sys + + import yaml + + # Ensure a YAML file is provided as an argument + if len(sys.argv) != 2: + sys.exit(1) + + config_path = sys.argv[1] + + # Read the YAML file + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + # Parse arguments using HfArgumentParser + parser = HfArgumentParser(ScriptArguments) + script_args = parser.parse_dict(config)[0] + return script_args + + +if __name__ == "__main__": + args = parse_config() + fsdp_v2.use_fsdp_v2() + dataset = load_dataset(args.dataset_name) + model, tokenizer = create_and_prepare_model(args) + trainer = create_and_prepare_trainer(model, tokenizer, dataset, args) + trainer.train() + if args.push_to_hub: + kwargs = { + "finetuned_from": args.model_name, + "dataset": args.dataset_name, + } + trainer.push_to_hub(**kwargs) diff --git a/mkdocs.yml b/mkdocs.yml index 2b8d2a029..59b3b03ff 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -235,6 +235,7 @@ nav: - TRL: examples/fine-tuning/trl/index.md - Accelerators: - AMD: examples/accelerators/amd/index.md + - TPU: examples/accelerators/tpu/index.md - LLMs: - Llama 3.1: examples/llms/llama31/index.md - Blog: