diff --git a/.env.example b/.env.example
new file mode 100644
index 000000000..f4e0e5128
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,3 @@
+INITIAL_PEERS=
+DEVICE=CUDA
+MODEL=meta-llama/Meta-Llama-8B-Instruct
diff --git a/.github/workflows/check-style.yaml b/.github/workflows/check-style.yaml
index 60ea42b42..d878766d0 100644
--- a/.github/workflows/check-style.yaml
+++ b/.github/workflows/check-style.yaml
@@ -9,18 +9,19 @@ jobs:
black:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: psf/black@stable
+ - uses: meta-introspector/checkout@main
+ - uses: meta-introspector/black@main
with:
options: "--check --diff"
version: "22.3.0"
isort:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v3
+ - uses: meta-introspector/checkout@main
+ - uses: meta-introspector/setup-python@main
with:
python-version: 3.8
- - uses: isort/isort-action@master
+ - uses: meta-introspector/isort-action@main
with:
isortVersion: "5.10.1"
+
diff --git a/.github/workflows/push-docker-image.yaml b/.github/workflows/push-docker-image.yaml
index 58fbb0030..88ae1294d 100644
--- a/.github/workflows/push-docker-image.yaml
+++ b/.github/workflows/push-docker-image.yaml
@@ -14,15 +14,15 @@ jobs:
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: meta-introspector/checkout@main
- name: Docker meta
id: meta
- uses: crazy-max/ghaction-docker-meta@v2
+ uses: meta-introspector/metadata-action@main
with:
# list of Docker images to use as base name for tags
images: |
- learningathome/petals
+ h4ckermike/petals
# generate Docker tags based on the following events/attributes
tags: |
type=ref,event=branch
@@ -33,17 +33,17 @@ jobs:
- name: Set up Docker Buildx
id: buildx
- uses: docker/setup-buildx-action@v1
+ uses: meta-introspector/setup-buildx-action@main
- name: Login to Docker Hub
if: github.event_name != 'pull_request'
- uses: docker/login-action@v1
+ uses: meta-introspector/login-action@main
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Free disk space on Ubuntu runner
- uses: kfir4444/free-disk-space@main
+ uses: meta-introspector/free-disk-space@main
with:
# found in: https://github.com/docker/build-push-action/issues/968
tool-cache: false
@@ -55,7 +55,7 @@ jobs:
- name: Build and push
id: docker_build
- uses: docker/build-push-action@v2
+ uses: meta-introspector/build-push-action@main
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
diff --git a/.github/workflows/run-tests-docker.yaml b/.github/workflows/run-tests-docker.yaml
new file mode 100644
index 000000000..6f6f00f18
--- /dev/null
+++ b/.github/workflows/run-tests-docker.yaml
@@ -0,0 +1,36 @@
+name: Tests in docker compose
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+
+jobs:
+ run-tests-in-compose:
+ # runs-on: ubuntu-latest
+ runs-on: self-hosted
+ timeout-minutes: 20
+ steps:
+ - name: Increase swap space
+ if: ${{ matrix.os == 'ubuntu' }}
+ uses: meta-introspector/set-swap-space@main
+ with:
+ swap-size-gb: 10
+ - name: Checkout
+ uses: meta-introspector/checkout@main
+
+ - name: Build the docker-compose stack
+ run: docker-compose -f docker-compose.yml up -d
+
+ - name: Check running containers
+ run: docker ps -a
+
+ - name: Check logs
+ run: docker logs health
+
+ - name: Build the docker-compose stack
+ run: docker-compose down
+
+
+
+
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
index d6316d48b..2ccc72951 100644
--- a/.github/workflows/run-tests.yaml
+++ b/.github/workflows/run-tests.yaml
@@ -24,17 +24,17 @@ jobs:
steps:
- name: Increase swap space
if: ${{ matrix.os == 'ubuntu' }}
- uses: pierotofy/set-swap-space@master
+ uses: meta-introspector/set-swap-space@main
with:
swap-size-gb: 10
- name: Checkout
- uses: actions/checkout@v3
+ uses: meta-introspector/checkout@main
- name: Set up Python
- uses: actions/setup-python@v3
+ uses: meta-introspector/setup-python@main
with:
python-version: ${{ matrix.python-version }}
- name: Cache dependencies
- uses: actions/cache@v3
+ uses: meta-introspector/cache@main
with:
path: ~/.cache/pip
key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }}
diff --git a/Dockerfile b/Dockerfile
index b1a267674..7ccf91f8e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
FROM nvcr.io/nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04
-LABEL maintainer="bigscience-workshop"
+LABEL maintainer="meta-introspector"
LABEL repository="petals"
WORKDIR /home
@@ -24,8 +24,22 @@ RUN conda install python~=3.10.12 pip && \
VOLUME /cache
ENV PETALS_CACHE=/cache
-COPY . petals/
+ADD pip.freeze petals/pip.freeze
+RUN pip install --no-cache-dir -r petals/pip.freeze
+ADD pip2.freeze petals/pip2.freeze
+RUN pip install --no-cache-dir -r petals/pip2.freeze
+
+ADD tests petals/tests
+ADD src petals/src
+ADD LICENSE README.md pyproject.toml setup.cfg petals/
+
RUN pip install --no-cache-dir -e petals
+RUN pip freeze > pip.freeze.new
+#RUN pip install --no-cache-dir --upgrade transformers==4.34.0
+
WORKDIR /home/petals/
-CMD bash
+
+RUN pip freeze > pip.freeze.new
+
+CMD python -m petals.cli.run_server --port 31331 --num_blocks=1 Maykeye/TinyLLama-v0
\ No newline at end of file
diff --git a/README.md b/README.md
index 63449ae11..d880286c4 100644
--- a/README.md
+++ b/README.md
@@ -144,3 +144,25 @@ _arXiv preprint arXiv:2209.01188,_ 2022.
+
+
+# setup
+
+
+ 1623 sudo cp petals-inference.service /etc/systemd/system/
+ 1634 sudo systemctl daemon-reload
+ 1635 sudo systemctl status petals-inference.service -l
+ 1636 sudo systemctl restart petals-inference.service -l
+
+ 1639 sudo useradd petals
+ 1640 sudo mkdir /home/petals
+ 1641 sudo chown petals: /home/petals/
+ 1643 sudo cp -r ~/.venv/ /home/petals/venv
+ 1644 sudo rm -rf /home/petals/venv
+ 1658 sudo mv ~/.venv/ /home/petals/venv
+ 1659 sudo chown petals: /home/petals/
+
+1670 sudo systemctl status petals-inference.service -l
+ 1674 sudo systemctl restart petals-inference.service -l
+ 1675 sudo systemctl status petals-inference.service -l
+
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 000000000..dca6cb09b
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,34 @@
+services:
+ envmodel_gpu:
+ profiles: ["miner","gpu"]
+# build: .
+ image: h4ckermike/petals:main
+ environment:
+ - MAX_DISK_SPACE=${MAX_DISK_SPACE}
+ - PUBLIC_NAME=${PUBLIC_NAME}
+ - MODEL=${MODEL}
+ - INITIAL_PEERS=${INITIAL_PEERS}
+ - DEVICE=${DEVICE}
+ - BLOCKS=${BLOCKS}
+ - PORT=${PORT}
+ - PUBLIC_IP=${PUBLIC_IP}
+# - GOLOG_LOG_LEVEL=debug
+ - GOLOG_LOG_LEVEL=info
+ - GOLOG_FILE=/opt/cillium/var/log/p2pd_miner.log
+
+ command: python -m petals.cli.run_server --num_blocks=$BLOCKS $MODEL --initial_peers $INITIAL_PEERS --device=$DEVICE --public_name $PUBLIC_NAME --max_disk_space $MAX_DISK_SPACE --public_ip=${PUBLIC_IP} --port ${PORT} --identity_path /cache/bootstrap1.id
+ volumes:
+ - petals-cache-backbone:/cache
+ - log:/opt/cillium/var/log
+
+ restart: always
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: all
+ capabilities: [gpu]
+
+volumes:
+ petals-cache-backbone:
diff --git a/envs/cpu/is1/env.txt b/envs/cpu/is1/env.txt
new file mode 100644
index 000000000..67a88f2cb
--- /dev/null
+++ b/envs/cpu/is1/env.txt
@@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+DEVICE=cpu
diff --git a/envs/dht1.cillium.dev.compute.agentartificial.com.txt b/envs/dht1.cillium.dev.compute.agentartificial.com.txt
new file mode 100644
index 000000000..32a0d303c
--- /dev/null
+++ b/envs/dht1.cillium.dev.compute.agentartificial.com.txt
@@ -0,0 +1,9 @@
+INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp
+DEVICE=cuda
+PUBLIC_NAME=NameNotConfigured
+MAX_DISK_SPACE=30GB
+BLOCKS=3
+#BLOCKS=33
+MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct
+PORT=30331
+PUBLIC_IP=FIXME
diff --git a/envs/dht1.cillium.mixed.txt b/envs/dht1.cillium.mixed.txt
new file mode 100644
index 000000000..01288a9a1
--- /dev/null
+++ b/envs/dht1.cillium.mixed.txt
@@ -0,0 +1,7 @@
+INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp:/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8
+DEVICE=cuda
+#MODEL=Maykeye/TinyLLama-v0
+MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B
+PUBLIC_NAME=NameNotConfigured
+MAX_DISK_SPACE=30GB
+BLOCKS=1
diff --git a/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt b/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt
new file mode 100644
index 000000000..7ae1551de
--- /dev/null
+++ b/envs/dht1.cillium.prod.compute.agentartificial.com.1.txt
@@ -0,0 +1,8 @@
+INITIAL_PEERS=/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8
+DEVICE=cuda
+#MODEL=Maykeye/TinyLLama-v0
+MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B
+PUBLIC_NAME=NameNotConfigured
+MAX_DISK_SPACE=30GB
+BLOCKS=1
+
diff --git a/envs/dht1.cillium.prod.compute.agentartificial.com.txt b/envs/dht1.cillium.prod.compute.agentartificial.com.txt
new file mode 100644
index 000000000..eb1972556
--- /dev/null
+++ b/envs/dht1.cillium.prod.compute.agentartificial.com.txt
@@ -0,0 +1,7 @@
+INITIAL_PEERS=/dns/dht1.cillium.prod.compute.agentartificial.com/tcp/8008/p2p/QmRuzeEHPohDR4BGvArrBrqUNC2YPvTc1m5iwTXefdKra8
+DEVICE=cuda
+PUBLIC_NAME=NameNotConfigured
+MAX_DISK_SPACE=300GB
+#BLOCKS=40
+BLOCKS=2
+MAX_DISK_SPACE=30GB
diff --git a/envs/gpu/h100.txt b/envs/gpu/h100.txt
new file mode 100644
index 000000000..dce85093d
--- /dev/null
+++ b/envs/gpu/h100.txt
@@ -0,0 +1,2 @@
+INITIAL_PEERS=/ip4/216.81.245.26/tcp/8008/p2p/QmR4PcZvHg414Q2HNEaQZLiu69HD4Vs17hcwDgq8qJdJq2
+DEVICE=cuda
diff --git a/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt b/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt
new file mode 100644
index 000000000..be8f62210
--- /dev/null
+++ b/envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt
@@ -0,0 +1,7 @@
+INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2
+DEVICE=cuda
+MODEL=VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct
+BLOCKS=100
+#MODEL=Maykeye/TinyLLama-v0
+#MODEL=SanjiWatsuki/TinyMixtral-32x248M
+PORT=31332
diff --git a/envs/gpu/h100/peers.txt b/envs/gpu/h100/peers.txt
new file mode 100644
index 000000000..fa750b647
--- /dev/null
+++ b/envs/gpu/h100/peers.txt
@@ -0,0 +1 @@
+INITIAL_PEERS=/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2
diff --git a/envs/gpu/is1/env.txt b/envs/gpu/is1/env.txt
new file mode 100644
index 000000000..5cfce73ed
--- /dev/null
+++ b/envs/gpu/is1/env.txt
@@ -0,0 +1,5 @@
+INITIAL_PEERS=/ip4/172.17.0.1/tcp/8099/p2p/QmfVvYv3w3EqpKGYG5FCcER9bFgoGLCUvXDUJsZAgSDw3N
+#PJRT_DEVICE=TPU
+DEVICE=cuda
+#DEVICE=cpu
+#DEVICE=tpux
diff --git a/envs/tpu/v3-0/env.txt b/envs/tpu/v3-0/env.txt
new file mode 100644
index 000000000..c0c3c0768
--- /dev/null
+++ b/envs/tpu/v3-0/env.txt
@@ -0,0 +1,4 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+#PJRT_DEVICE=TPU
+#DEVICE=xla
+DEVICE=cpu
\ No newline at end of file
diff --git a/etc/petals-inference.service b/etc/petals-inference.service
new file mode 100644
index 000000000..ae08b6f40
--- /dev/null
+++ b/etc/petals-inference.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=Petals Inference
+
+[Service]
+User=petals
+Group=petals
+Environment=PJRT_DEVICE=TPU
+ExecStart=/home/petals/venv/bin/python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=4
+
+[Install]
+WantedBy=multi-user.target
diff --git a/get_peersl.sh b/get_peersl.sh
new file mode 100644
index 000000000..b14ee899a
--- /dev/null
+++ b/get_peersl.sh
@@ -0,0 +1 @@
+docker logs petals-backbone-1 2>&1 | grep initial_peers | cut "-d " -f18- | sort -u > peers.txt
diff --git a/health.env b/health.env
new file mode 100644
index 000000000..20e15a404
--- /dev/null
+++ b/health.env
@@ -0,0 +1,3 @@
+INITIAL_PEERS=/ip4/10.164.0.22/tcp/8099/p2p/QmRVmvteSpVKKeNDSaV7Ezy3HNA4bnNfE2EbzDJVFDEwAa
+PJRT_DEVICE=TPU
+DEVICE=xla
\ No newline at end of file
diff --git a/pip.freeze b/pip.freeze
new file mode 100644
index 000000000..50199037f
--- /dev/null
+++ b/pip.freeze
@@ -0,0 +1,77 @@
+accelerate==0.29.2
+async-timeout==4.0.3
+base58==2.1.1
+bitsandbytes==0.41.1
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+ConfigArgParse==1.7
+cpufeature==0.2.1
+cryptography==42.0.5
+
+Dijkstar==2.6.0
+filelock==3.13.4
+fsspec==2024.3.1
+grpcio==1.62.1
+grpcio-tools==1.62.1
+humanfriendly==10.0
+idna==3.7
+Jinja2==3.1.3
+jsonpointer==2.1
+
+MarkupSafe==2.1.5
+mpmath==1.3.0
+msgpack==1.0.8
+multiaddr==0.0.9
+netaddr==1.2.1
+networkx==3.3
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.1.105
+
+packaging==24.0
+peft==0.5.0
+prefetch-generator==1.0.3
+protobuf==4.25.3
+psutil==5.9.8
+pycparser==2.22
+pydantic==1.10.15
+pymultihash==0.8.2
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+
+safetensors==0.4.3
+scipy==1.13.0
+sentencepiece==0.2.0
+six==1.16.0
+sortedcontainers==2.4.0
+speedtest-cli==2.1.3
+sympy==1.12
+tensor-parallel==1.0.23
+
+tqdm==4.66.2
+triton==2.2.0
+typing_extensions==4.11.0
+urllib3==2.2.1
+uvloop==0.19.0
+varint==1.0.2
+triton==2.2.0
+typing_extensions==4.11.0
+uvloop==0.19.0
+varint==1.0.2
+
+
+hivemind==1.1.10.post2
+peft==0.5.0
+torch==2.2.2
diff --git a/pip2.freeze b/pip2.freeze
new file mode 100644
index 000000000..f473bed0a
--- /dev/null
+++ b/pip2.freeze
@@ -0,0 +1,12 @@
+
+#transformers==4.38.2
+transformers==4.34.0
+
+
+#tokenizers==0.15.2
+tokenizers>=0.14,<0.15
+
+huggingface_hub>=0.16.4,<0.18
+#huggingface-hub==0.22.2
+
+
diff --git a/pyproject.toml b/pyproject.toml
index cfc991c07..6f1475b01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
[tool.black]
line-length = 120
-required-version = "22.3.0"
+required-version = "24.3.0"
[tool.isort]
profile = "black"
diff --git a/run.sh b/run.sh
new file mode 100755
index 000000000..15bff4390
--- /dev/null
+++ b/run.sh
@@ -0,0 +1 @@
+PJRT_DEVICE=TPU python -m petals.cli.run_server --port 31330 petals-team/StableBeluga2 --device xla --num_blocks=10
diff --git a/run2.sh b/run2.sh
new file mode 100644
index 000000000..23c984d57
--- /dev/null
+++ b/run2.sh
@@ -0,0 +1 @@
+docker run -d --net host --ipc host --volume petals-cache-backbone:/cache --name backbone --rm learningathome/petals:main python -m petals.cli.run_dht --host_maddrs /ip4/0.0.0.0/tcp/8099 --identity_path bootstrap1.id
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 000000000..b9b40da3c
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+
+docker compose --profile miner --env-file envs/dht1.cillium.dev.compute.agentartificial.com.txt up -d
+#sudo docker compose --profile miner --env-file envs/gpu/h100/dht1.cillium.dev.compute.agentartificial.com.txt up -d
diff --git a/src/petals/__init__.py b/src/petals/__init__.py
index ccc560e0c..68120bfc4 100644
--- a/src/petals/__init__.py
+++ b/src/petals/__init__.py
@@ -20,10 +20,10 @@
__version__ = "2.3.0.dev2"
-if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
- assert (
- version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
- ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
+# if not os.getenv("PETALS_IGNORE_DEPENDENCY_VERSION"):
+# assert (
+# version.parse("4.38.2") <= version.parse(transformers.__version__) < version.parse("4.39.0")
+# ), "Please install a proper transformers version: pip install transformers>=4.37.1,<4.39.0"
def _override_bfloat16_mode_default():
diff --git a/src/petals/client/ptune.py b/src/petals/client/ptune.py
index f3995d68a..ecd3543e8 100644
--- a/src/petals/client/ptune.py
+++ b/src/petals/client/ptune.py
@@ -51,7 +51,7 @@ def get_prompt(self, batch_size):
batch_size,
self.pre_seq_len,
self.config.num_hidden_layers,
- self.config.hidden_size
+ self.config.hidden_size,
# TODO: should be num_hidden_layers - 1
)
intermediate_prompts = intermediate_prompts.permute([2, 0, 1, 3])
diff --git a/src/petals/client/remote_forward_backward.py b/src/petals/client/remote_forward_backward.py
index 44abe2686..d61106702 100644
--- a/src/petals/client/remote_forward_backward.py
+++ b/src/petals/client/remote_forward_backward.py
@@ -1,6 +1,7 @@
"""
Utility functions that call RPC forward or backward on a single remote server
"""
+
import asyncio
from typing import Iterable, List, Optional, Sequence, Tuple
diff --git a/src/petals/client/routing/spending_policy.py b/src/petals/client/routing/spending_policy.py
index 0af3db7fd..f4eddba76 100644
--- a/src/petals/client/routing/spending_policy.py
+++ b/src/petals/client/routing/spending_policy.py
@@ -3,6 +3,7 @@
The intent is to let Petals participants earn points by helping others while idle (e.g. at night), then use these
points to run their own compute experiments faster. See Section 4 of https://arxiv.org/abs/2209.01188 for discussion.
"""
+
from abc import ABC, abstractmethod
diff --git a/src/petals/client/sequential_autograd.py b/src/petals/client/sequential_autograd.py
index 9d965d2a5..c95f0a701 100644
--- a/src/petals/client/sequential_autograd.py
+++ b/src/petals/client/sequential_autograd.py
@@ -1,6 +1,7 @@
"""
A PyTorch autograd function that runs forward/backward on a sequence of remote servers in a fault-tolerant manner
"""
+
import asyncio
import itertools
from collections import deque
diff --git a/src/petals/constants.py b/src/petals/constants.py
index d307b8140..3425ea6b8 100644
--- a/src/petals/constants.py
+++ b/src/petals/constants.py
@@ -2,17 +2,10 @@
PUBLIC_INITIAL_PEERS = [
# IPv4 DNS addresses
- "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
- "/dns/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
- # IPv6 DNS addresses
- "/dns6/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
- "/dns6/bootstrap2.petals.dev/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
- # Reserved IPs
- "/ip4/159.89.214.152/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY",
- "/ip4/159.203.156.48/tcp/31338/p2p/QmQGTqmM7NKjV6ggU1ZCap8zWiyKR89RViDXiqehSiCpY5",
+ "/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/Qmb3skfrki1PR8ww6nxvoGm51F5imK3e1DPMZgtay6ofE2"
]
# The reachability API is currently used only when connecting to the public swarm
-REACHABILITY_API_URL = "https://health.petals.dev"
+REACHABILITY_API_URL = "https://health.cillium.dev.compute.agentartificial.com"
DTYPE_MAP = dict(bfloat16=torch.bfloat16, float16=torch.float16, float32=torch.float32, auto="auto")
diff --git a/src/petals/models/bloom/block.py b/src/petals/models/bloom/block.py
index 439b9ca10..d7431860c 100644
--- a/src/petals/models/bloom/block.py
+++ b/src/petals/models/bloom/block.py
@@ -3,6 +3,7 @@
Based on https://github.com/huggingface/transformers/commit/ca2a55e9dfb245527b5e1c954fec6ffbb7aef07b
See commit history for authorship.
"""
+
from typing import Optional, Tuple
import torch
diff --git a/src/petals/models/falcon/block.py b/src/petals/models/falcon/block.py
index a510abaa1..761bd5dd1 100644
--- a/src/petals/models/falcon/block.py
+++ b/src/petals/models/falcon/block.py
@@ -3,6 +3,7 @@
Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/falcon/modeling_falcon.py
See commit history for authorship.
"""
+
import math
from functools import partial
from typing import Optional, Tuple
diff --git a/src/petals/models/llama/block.py b/src/petals/models/llama/block.py
index 2eb8f731f..bd6c8c86d 100644
--- a/src/petals/models/llama/block.py
+++ b/src/petals/models/llama/block.py
@@ -3,6 +3,7 @@
Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
See commit history for authorship.
"""
+
import math
from typing import Optional, Tuple
diff --git a/src/petals/server/block_functions.py b/src/petals/server/block_functions.py
index a79f05c94..4c3cf9a02 100644
--- a/src/petals/server/block_functions.py
+++ b/src/petals/server/block_functions.py
@@ -1,6 +1,7 @@
"""
This module implements server-side computations on served blocks: forward, backward and inference; used by handler
"""
+
from __future__ import annotations
from typing import Any, AsyncIterator, Dict, Optional, Sequence, Tuple, Union
diff --git a/src/petals/server/from_pretrained.py b/src/petals/server/from_pretrained.py
index 4a3b15077..bad0b2ef0 100644
--- a/src/petals/server/from_pretrained.py
+++ b/src/petals/server/from_pretrained.py
@@ -6,6 +6,7 @@
- fetch the weights over IPoAC, using a fleet of trained pigeons ( http://www.faqs.org/rfcs/rfc1149.html )
"""
+
import json
import time
from contextlib import suppress
diff --git a/src/petals/server/memory_cache.py b/src/petals/server/memory_cache.py
index fa4db218f..9e7abf7b2 100644
--- a/src/petals/server/memory_cache.py
+++ b/src/petals/server/memory_cache.py
@@ -4,6 +4,7 @@
For now, the only purpose of this code is to ensure that allocated memory will be deleted properly.
"""
+
import asyncio
import contextlib
import ctypes
diff --git a/src/petals/server/throughput.py b/src/petals/server/throughput.py
index d9471790f..c30d2877a 100644
--- a/src/petals/server/throughput.py
+++ b/src/petals/server/throughput.py
@@ -206,7 +206,7 @@ def measure_compute_rps(
block = block.to(dtype)
block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True)
- cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype))
+ cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device))
elapsed = 0
dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)
diff --git a/src/petals/utils/auto_config.py b/src/petals/utils/auto_config.py
index 0cec83d87..6043c7ba2 100644
--- a/src/petals/utils/auto_config.py
+++ b/src/petals/utils/auto_config.py
@@ -40,6 +40,9 @@ def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike, None], *arg
):
kwargs["use_auth_token"] = True
+ kwargs["trust_remote_code"] = True
+ # trust_remote_code=True
+
config = AutoConfig.from_pretrained(model_name_or_path, *args, **kwargs)
if config.model_type not in _CLASS_MAPPING:
raise ValueError(f"Petals does not support model type {config.model_type}")
diff --git a/src/petals/utils/convert_block.py b/src/petals/utils/convert_block.py
index 94d3e29f3..26d6b7dd3 100644
--- a/src/petals/utils/convert_block.py
+++ b/src/petals/utils/convert_block.py
@@ -1,6 +1,7 @@
"""
Tools for converting transformer blocks, applying quantization and/or tensor parallelism
"""
+
import re
from enum import Enum
from typing import Optional, Sequence
diff --git a/src/petals/utils/dht.py b/src/petals/utils/dht.py
index 4faf74aa7..357cd98cc 100644
--- a/src/petals/utils/dht.py
+++ b/src/petals/utils/dht.py
@@ -1,6 +1,7 @@
"""
Utilities for declaring and retrieving active model layers using a shared DHT.
"""
+
from __future__ import annotations
import math
diff --git a/up.sh b/up.sh
new file mode 100644
index 000000000..8ed68d13e
--- /dev/null
+++ b/up.sh
@@ -0,0 +1 @@
+sudo docker compose --profile core --env-file ./envs/gpu/h100.txt up health