diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..f4e0e5128 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +INITIAL_PEERS= +DEVICE=CUDA +MODEL=meta-llama/Meta-Llama-8B-Instruct diff --git a/.github/workflows/check-style.yaml b/.github/workflows/check-style.yaml index 60ea42b42..d878766d0 100644 --- a/.github/workflows/check-style.yaml +++ b/.github/workflows/check-style.yaml @@ -9,18 +9,19 @@ jobs: black: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable + - uses: meta-introspector/checkout@main + - uses: meta-introspector/black@main with: options: "--check --diff" version: "22.3.0" isort: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: meta-introspector/checkout@main + - uses: meta-introspector/setup-python@main with: python-version: 3.8 - - uses: isort/isort-action@master + - uses: meta-introspector/isort-action@main with: isortVersion: "5.10.1" + diff --git a/.github/workflows/push-docker-image.yaml b/.github/workflows/push-docker-image.yaml index 58fbb0030..88ae1294d 100644 --- a/.github/workflows/push-docker-image.yaml +++ b/.github/workflows/push-docker-image.yaml @@ -14,15 +14,15 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: meta-introspector/checkout@main - name: Docker meta id: meta - uses: crazy-max/ghaction-docker-meta@v2 + uses: meta-introspector/metadata-action@main with: # list of Docker images to use as base name for tags images: | - learningathome/petals + h4ckermike/petals # generate Docker tags based on the following events/attributes tags: | type=ref,event=branch @@ -33,17 +33,17 @@ jobs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: meta-introspector/setup-buildx-action@main - name: Login to Docker Hub if: github.event_name != 'pull_request' - uses: docker/login-action@v1 + uses: meta-introspector/login-action@main with: username: ${{ secrets.DOCKER_HUB_USERNAME }} password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} - name: Free disk space on Ubuntu runner - uses: kfir4444/free-disk-space@main + uses: meta-introspector/free-disk-space@main with: # found in: https://github.com/docker/build-push-action/issues/968 tool-cache: false @@ -55,7 +55,7 @@ jobs: - name: Build and push id: docker_build - uses: docker/build-push-action@v2 + uses: meta-introspector/build-push-action@main with: context: . push: ${{ github.event_name != 'pull_request' }} diff --git a/.github/workflows/run-tests-docker.yaml b/.github/workflows/run-tests-docker.yaml new file mode 100644 index 000000000..6f6f00f18 --- /dev/null +++ b/.github/workflows/run-tests-docker.yaml @@ -0,0 +1,36 @@ +name: Tests in docker compose + +on: + push: + branches: [ main ] + pull_request: + +jobs: + run-tests-in-compose: + # runs-on: ubuntu-latest + runs-on: self-hosted + timeout-minutes: 20 + steps: + - name: Increase swap space + if: ${{ matrix.os == 'ubuntu' }} + uses: meta-introspector/set-swap-space@main + with: + swap-size-gb: 10 + - name: Checkout + uses: meta-introspector/checkout@main + + - name: Build the docker-compose stack + run: docker-compose -f docker-compose.yml up -d + + - name: Check running containers + run: docker ps -a + + - name: Check logs + run: docker logs health + + - name: Build the docker-compose stack + run: docker-compose down + + + + diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index d6316d48b..2ccc72951 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -24,17 +24,17 @@ jobs: steps: - name: Increase swap space if: ${{ matrix.os == 'ubuntu' }} - uses: pierotofy/set-swap-space@master + uses: meta-introspector/set-swap-space@main with: swap-size-gb: 10 - name: Checkout - uses: actions/checkout@v3 + uses: meta-introspector/checkout@main - name: Set up Python - uses: actions/setup-python@v3 + uses: meta-introspector/setup-python@main with: python-version: ${{ matrix.python-version }} - name: Cache dependencies - uses: actions/cache@v3 + uses: meta-introspector/cache@main with: path: ~/.cache/pip key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} diff --git a/Dockerfile b/Dockerfile index b1a267674..7ccf91f8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 -LABEL maintainer="bigscience-workshop" +LABEL maintainer="meta-introspector" LABEL repository="petals" WORKDIR /home @@ -24,8 +24,22 @@ RUN conda install python~=3.10.12 pip && \ VOLUME /cache ENV PETALS_CACHE=/cache -COPY . petals/ +ADD pip.freeze petals/pip.freeze +RUN pip install --no-cache-dir -r petals/pip.freeze +ADD pip2.freeze petals/pip2.freeze +RUN pip install --no-cache-dir -r petals/pip2.freeze + +ADD tests petals/tests +ADD src petals/src +ADD LICENSE README.md pyproject.toml setup.cfg petals/ + RUN pip install --no-cache-dir -e petals +RUN pip freeze > pip.freeze.new +#RUN pip install --no-cache-dir --upgrade transformers==4.34.0 + WORKDIR /home/petals/ -CMD bash + +RUN pip freeze > pip.freeze.new + +CMD python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0 \ No newline at end of file diff --git a/README.md b/README.md index 63449ae11..d880286c4 100644 --- a/README.md +++ b/README.md @@ -144,3 +144,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.

+ + +# setup + + + 1623 sudo cp petals-inference.service /etc/systemd/system/ + 1634 sudo systemctl daemon-reload + 1635 sudo systemctl status petals-inference.service -l + 1636 sudo systemctl restart petals-inference.service -l + + 1639 sudo useradd petals + 1640 sudo mkdir /home/petals + 1641 sudo chown petals: /home/petals/ + 1643 sudo cp -r ~/.venv/ /home/petals/venv + 1644 sudo rm -rf /home/petals/venv + 1658 sudo mv ~/.venv/ /home/petals/venv + 1659 sudo chown petals: /home/petals/ + +1670 sudo systemctl status petals-inference.service -l + 1674 sudo systemctl restart petals-inference.service -l + 1675 sudo systemctl status petals-inference.service -l + \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..dca6cb09b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +services: + envmodel_gpu: + profiles: ["miner","gpu"] +# build: . + image: h4ckermike/petals:main + environment: + - MAX_DISK_SPACE=${MAX_DISK_SPACE} + - PUBLIC_NAME=${PUBLIC_NAME} + - MODEL=${MODEL} + - INITIAL_PEERS=${INITIAL_PEERS} + - DEVICE=${DEVICE} + - BLOCKS=${BLOCKS} + - PORT=${PORT} + - PUBLIC_IP=${PUBLIC_IP} +# - GOLOG_LOG_LEVEL=debug + - GOLOG_LOG_LEVEL=info + - GOLOG_FILE=/opt/cillium/var/log/p2pd_miner.log + + command: python -m petals.cli.run_server --num_blocks=$BLOCKS $MODEL --initial_peers $INITIAL_PEERS --device=$DEVICE --public_name $PUBLIC_NAME --max_disk_space $MAX_DISK_SPACE --public_ip=${PUBLIC_IP} --port ${PORT} --identity_path /cache/bootstrap1.id + volumes: + - petals-cache-backbone:/cache + - log:/opt/cillium/var/log + + restart: always + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +volumes: + petals-cache-backbone: diff --git a/envs/cpu/is1/env.txt b/envs/cpu/is1/env.txt new file mode 100644 index 000000000..67a88f2cb --- /dev/null +++ b/envs/cpu/is1/env.txt @@ -0,0 +1,2 @@ +INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N +DEVICE=cpu diff --git a/envs/dht1.cillium.dev.compute.agentartificial.com.txt b/envs/dht1.cillium.dev.compute.agentartificial.com.txt new file mode 100644 index 000000000..32a0d303c --- /dev/null +++ b/envs/dht1.cillium.dev.compute.agentartificial.com.txt @@ -0,0 +1,9 @@ +INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp +DEVICE=cuda +PUBLIC_NAME=NameNotConfigured +MAX_DISK_SPACE=30GB +BLOCKS=3 +#BLOCKS=33 +MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct +PORT=30331 +PUBLIC_IP=FIXME diff --git a/envs/dht1.cillium.mixed.txt b/envs/dht1.cillium.mixed.txt new file mode 100644 index 000000000..01288a9a1 --- /dev/null +++ b/envs/dht1.cillium.mixed.txt @@ -0,0 +1,7 @@ +INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp:/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8 +DEVICE=cuda +#MODEL=Maykeye/TinyLLama-v0 +MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B +PUBLIC_NAME=NameNotConfigured +MAX_DISK_SPACE=30GB +BLOCKS=1 diff --git a/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt b/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt new file mode 100644 index 000000000..7ae1551de --- /dev/null +++ b/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt @@ -0,0 +1,8 @@ +INITIAL_PEERS=/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8 +DEVICE=cuda +#MODEL=Maykeye/TinyLLama-v0 +MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B +PUBLIC_NAME=NameNotConfigured +MAX_DISK_SPACE=30GB +BLOCKS=1 + diff --git a/envs/dht1.cillium.prod.compute.agentartificial.com.txt b/envs/dht1.cillium.prod.compute.agentartificial.com.txt new file mode 100644 index 000000000..eb1972556 --- /dev/null +++ b/envs/dht1.cillium.prod.compute.agentartificial.com.txt @@ -0,0 +1,7 @@ +INITIAL_PEERS=/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8 +DEVICE=cuda +PUBLIC_NAME=NameNotConfigured +MAX_DISK_SPACE=300GB +#BLOCKS=40 +BLOCKS=2 +MAX_DISK_SPACE=30GB diff --git a/envs/gpu/h100.txt b/envs/gpu/h100.txt new file mode 100644 index 000000000..dce85093d --- /dev/null +++ b/envs/gpu/h100.txt @@ -0,0 +1,2 @@ +INITIAL_PEERS=/ip4/216.81.245.26/tcp/8008/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2 +DEVICE=cuda diff --git a/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt b/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt new file mode 100644 index 000000000..be8f62210 --- /dev/null +++ b/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt @@ -0,0 +1,7 @@ +INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2 +DEVICE=cuda +MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct +BLOCKS=100 +#MODEL=Maykeye/TinyLLama-v0 +#MODEL=SanjiWatsuki/TinyMixtral-32x248M +PORT=31332 diff --git a/envs/gpu/h100/peers.txt b/envs/gpu/h100/peers.txt new file mode 100644 index 000000000..fa750b647 --- /dev/null +++ b/envs/gpu/h100/peers.txt @@ -0,0 +1 @@ +INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2 diff --git a/envs/gpu/is1/env.txt b/envs/gpu/is1/env.txt new file mode 100644 index 000000000..5cfce73ed --- /dev/null +++ b/envs/gpu/is1/env.txt @@ -0,0 +1,5 @@ +INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N +#PJRT_DEVICE=TPU +DEVICE=cuda +#DEVICE=cpu +#DEVICE=tpux diff --git a/envs/tpu/v3-0/env.txt b/envs/tpu/v3-0/env.txt new file mode 100644 index 000000000..c0c3c0768 --- /dev/null +++ b/envs/tpu/v3-0/env.txt @@ -0,0 +1,4 @@ +INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa +#PJRT_DEVICE=TPU +#DEVICE=xla +DEVICE=cpu \ No newline at end of file diff --git a/etc/petals-inference.service b/etc/petals-inference.service new file mode 100644 index 000000000..ae08b6f40 --- /dev/null +++ b/etc/petals-inference.service @@ -0,0 +1,11 @@ +[Unit] +Description=Petals Inference + +[Service] +User=petals +Group=petals +Environment=PJRT_DEVICE=TPU +ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4 + +[Install] +WantedBy=multi-user.target diff --git a/get_peersl.sh b/get_peersl.sh new file mode 100644 index 000000000..b14ee899a --- /dev/null +++ b/get_peersl.sh @@ -0,0 +1 @@ +docker logs petals-backbone-1 2>&1 | grep initial_peers | cut "-d " -f18- | sort -u > peers.txt diff --git a/health.env b/health.env new file mode 100644 index 000000000..20e15a404 --- /dev/null +++ b/health.env @@ -0,0 +1,3 @@ +INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa +PJRT_DEVICE=TPU +DEVICE=xla \ No newline at end of file diff --git a/pip.freeze b/pip.freeze new file mode 100644 index 000000000..50199037f --- /dev/null +++ b/pip.freeze @@ -0,0 +1,77 @@ +accelerate==0.29.2 +async-timeout==4.0.3 +base58==2.1.1 +bitsandbytes==0.41.1 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +ConfigArgParse==1.7 +cpufeature==0.2.1 +cryptography==42.0.5 + +Dijkstar==2.6.0 +filelock==3.13.4 +fsspec==2024.3.1 +grpcio==1.62.1 +grpcio-tools==1.62.1 +humanfriendly==10.0 +idna==3.7 +Jinja2==3.1.3 +jsonpointer==2.1 + +MarkupSafe==2.1.5 +mpmath==1.3.0 +msgpack==1.0.8 +multiaddr==0.0.9 +netaddr==1.2.1 +networkx==3.3 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 + +packaging==24.0 +peft==0.5.0 +prefetch-generator==1.0.3 +protobuf==4.25.3 +psutil==5.9.8 +pycparser==2.22 +pydantic==1.10.15 +pymultihash==0.8.2 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 + +safetensors==0.4.3 +scipy==1.13.0 +sentencepiece==0.2.0 +six==1.16.0 +sortedcontainers==2.4.0 +speedtest-cli==2.1.3 +sympy==1.12 +tensor-parallel==1.0.23 + +tqdm==4.66.2 +triton==2.2.0 +typing_extensions==4.11.0 +urllib3==2.2.1 +uvloop==0.19.0 +varint==1.0.2 +triton==2.2.0 +typing_extensions==4.11.0 +uvloop==0.19.0 +varint==1.0.2 + + +hivemind==1.1.10.post2 +peft==0.5.0 +torch==2.2.2 diff --git a/pip2.freeze b/pip2.freeze new file mode 100644 index 000000000..f473bed0a --- /dev/null +++ b/pip2.freeze @@ -0,0 +1,12 @@ + +#transformers==4.38.2 +transformers==4.34.0 + + +#tokenizers==0.15.2 +tokenizers>=0.14,<0.15 + +huggingface_hub>=0.16.4,<0.18 +#huggingface-hub==0.22.2 + + diff --git a/pyproject.toml b/pyproject.toml index cfc991c07..6f1475b01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 120 -required-version = "22.3.0" +required-version = "24.3.0" [tool.isort] profile = "black" diff --git a/run.sh b/run.sh new file mode 100755 index 000000000..15bff4390 --- /dev/null +++ b/run.sh @@ -0,0 +1 @@ +PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10 diff --git a/run2.sh b/run2.sh new file mode 100644 index 000000000..23c984d57 --- /dev/null +++ b/run2.sh @@ -0,0 +1 @@ +docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id diff --git a/setup.sh b/setup.sh new file mode 100755 index 000000000..b9b40da3c --- /dev/null +++ b/setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e + +docker compose --profile miner --env-file envs/dht1.cillium.dev.compute.agentartificial.com.txt up -d +#sudo docker compose --profile miner --env-file envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt up -d diff --git a/src/petals/__init__.py b/src/petals/__init__.py index ccc560e0c..68120bfc4 100644 --- a/src/petals/__init__.py +++ b/src/petals/__init__.py @@ -20,10 +20,10 @@ __version__ = "2.3.0.dev2" -if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"): - assert ( - version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0") - ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0" +# if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"): +# assert ( +# version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0") +# ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0" def _override_bfloat16_mode_default(): diff --git a/src/petals/client/ptune.py b/src/petals/client/ptune.py index f3995d68a..ecd3543e8 100644 --- a/src/petals/client/ptune.py +++ b/src/petals/client/ptune.py @@ -51,7 +51,7 @@ def get_prompt(self, batch_size): batch_size, self.pre_seq_len, self.config.num_hidden_layers, - self.config.hidden_size + self.config.hidden_size, # TODO: should be num_hidden_layers - 1 ) intermediate_prompts = intermediate_prompts.permute([2, 0, 1, 3]) diff --git a/src/petals/client/remote_forward_backward.py b/src/petals/client/remote_forward_backward.py index 44abe2686..d61106702 100644 --- a/src/petals/client/remote_forward_backward.py +++ b/src/petals/client/remote_forward_backward.py @@ -1,6 +1,7 @@ """ Utility functions that call RPC forward or backward on a single remote server """ + import asyncio from typing import Iterable, List, Optional, Sequence, Tuple diff --git a/src/petals/client/routing/spending_policy.py b/src/petals/client/routing/spending_policy.py index 0af3db7fd..f4eddba76 100644 --- a/src/petals/client/routing/spending_policy.py +++ b/src/petals/client/routing/spending_policy.py @@ -3,6 +3,7 @@ The intent is to let Petals participants earn points by helping others while idle (e.g. at night), then use these points to run their own compute experiments faster. See Section 4 of https://arxiv.org/abs/2209.01188 for discussion. """ + from abc import ABC, abstractmethod diff --git a/src/petals/client/sequential_autograd.py b/src/petals/client/sequential_autograd.py index 9d965d2a5..c95f0a701 100644 --- a/src/petals/client/sequential_autograd.py +++ b/src/petals/client/sequential_autograd.py @@ -1,6 +1,7 @@ """ A PyTorch autograd function that runs forward/backward on a sequence of remote servers in a fault-tolerant manner """ + import asyncio import itertools from collections import deque diff --git a/src/petals/constants.py b/src/petals/constants.py index d307b8140..3425ea6b8 100644 --- a/src/petals/constants.py +++ b/src/petals/constants.py @@ -2,17 +2,10 @@ PUBLIC_INITIAL_PEERS = [ # IPv4 DNS addresses - "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", - "/dns/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", - # IPv6 DNS addresses - "/dns6/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", - "/dns6/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", - # Reserved IPs - "/ip4/159.89.214.152/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY", - "/ip4/159.203.156.48/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5", + "/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2" ] # The reachability API is currently used only when connecting to the public swarm -REACHABILITY_API_URL = "https://health.petals.dev" +REACHABILITY_API_URL = "https://health.cillium.dev.compute.agentartificial.com" DTYPE_MAP = dict(bfloat16=torch.bfloat16, float16=torch.float16, float32=torch.float32, auto="auto") diff --git a/src/petals/models/bloom/block.py b/src/petals/models/bloom/block.py index 439b9ca10..d7431860c 100644 --- a/src/petals/models/bloom/block.py +++ b/src/petals/models/bloom/block.py @@ -3,6 +3,7 @@ Based on https://github.com/huggingface/transformers/commit/ca2a55e9dfb245527b5e1c954fec6ffbb7aef07b See commit history for authorship. """ + from typing import Optional, Tuple import torch diff --git a/src/petals/models/falcon/block.py b/src/petals/models/falcon/block.py index a510abaa1..761bd5dd1 100644 --- a/src/petals/models/falcon/block.py +++ b/src/petals/models/falcon/block.py @@ -3,6 +3,7 @@ Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/falcon/modeling_falcon.py See commit history for authorship. """ + import math from functools import partial from typing import Optional, Tuple diff --git a/src/petals/models/llama/block.py b/src/petals/models/llama/block.py index 2eb8f731f..bd6c8c86d 100644 --- a/src/petals/models/llama/block.py +++ b/src/petals/models/llama/block.py @@ -3,6 +3,7 @@ Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py See commit history for authorship. """ + import math from typing import Optional, Tuple diff --git a/src/petals/server/block_functions.py b/src/petals/server/block_functions.py index a79f05c94..4c3cf9a02 100644 --- a/src/petals/server/block_functions.py +++ b/src/petals/server/block_functions.py @@ -1,6 +1,7 @@ """ This module implements server-side computations on served blocks: forward, backward and inference; used by handler """ + from __future__ import annotations from typing import Any, AsyncIterator, Dict, Optional, Sequence, Tuple, Union diff --git a/src/petals/server/from_pretrained.py b/src/petals/server/from_pretrained.py index 4a3b15077..bad0b2ef0 100644 --- a/src/petals/server/from_pretrained.py +++ b/src/petals/server/from_pretrained.py @@ -6,6 +6,7 @@ - fetch the weights over IPoAC, using a fleet of trained pigeons ( http://www.faqs.org/rfcs/rfc1149.html ) """ + import json import time from contextlib import suppress diff --git a/src/petals/server/memory_cache.py b/src/petals/server/memory_cache.py index fa4db218f..9e7abf7b2 100644 --- a/src/petals/server/memory_cache.py +++ b/src/petals/server/memory_cache.py @@ -4,6 +4,7 @@ For now, the only purpose of this code is to ensure that allocated memory will be deleted properly. """ + import asyncio import contextlib import ctypes diff --git a/src/petals/server/throughput.py b/src/petals/server/throughput.py index d9471790f..c30d2877a 100644 --- a/src/petals/server/throughput.py +++ b/src/petals/server/throughput.py @@ -206,7 +206,7 @@ def measure_compute_rps( block = block.to(dtype) block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True) - cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype)) + cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device)) elapsed = 0 dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype) diff --git a/src/petals/utils/auto_config.py b/src/petals/utils/auto_config.py index 0cec83d87..6043c7ba2 100644 --- a/src/petals/utils/auto_config.py +++ b/src/petals/utils/auto_config.py @@ -40,6 +40,9 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike, None], *arg ): kwargs["use_auth_token"] = True + kwargs["trust_remote_code"] = True + # trust_remote_code=True + config = AutoConfig.from_pretrained(model_name_or_path, *args, **kwargs) if config.model_type not in _CLASS_MAPPING: raise ValueError(f"Petals does not support model type {config.model_type}") diff --git a/src/petals/utils/convert_block.py b/src/petals/utils/convert_block.py index 94d3e29f3..26d6b7dd3 100644 --- a/src/petals/utils/convert_block.py +++ b/src/petals/utils/convert_block.py @@ -1,6 +1,7 @@ """ Tools for converting transformer blocks, applying quantization and/or tensor parallelism """ + import re from enum import Enum from typing import Optional, Sequence diff --git a/src/petals/utils/dht.py b/src/petals/utils/dht.py index 4faf74aa7..357cd98cc 100644 --- a/src/petals/utils/dht.py +++ b/src/petals/utils/dht.py @@ -1,6 +1,7 @@ """ Utilities for declaring and retrieving active model layers using a shared DHT. """ + from __future__ import annotations import math diff --git a/up.sh b/up.sh new file mode 100644 index 000000000..8ed68d13e --- /dev/null +++ b/up.sh @@ -0,0 +1 @@ +sudo docker compose --profile core --env-file ./envs/gpu/h100.txt up health