Update diffusers and fix test

okotaku · okotaku · Jan 5, 2024 · Jan 5, 2024 · Jan 5, 2024 · Jan 5, 2024
commit 5b0a3440f1e81f5d2a18727f649d6b0150651af9
diff --git a/README.md b/README.md
@@ -49,12 +49,12 @@ pip install git+https://github.com/okotaku/diffengine.git
 
 DiffEngine makes training easy through its pre-defined configs. These configs provide a streamlined way to start your training process. Here's how you can get started using one of the pre-defined configs:
 
-1. **Choose a config**: You can find various pre-defined configs in the [`configs`](configs/) directory of the DiffEngine repository. For example, if you wish to train a DreamBooth model using the Stable Diffusion algorithm, you can use the [`configs/stable_diffusion_dreambooth/stable_diffusion_v15_dreambooth_lora_dog.py`](configs/stable_diffusion_dreambooth/stable_diffusion_v15_dreambooth_lora_dog.py).
+1. **Choose a config**: You can find various pre-defined configs in the [`configs`](diffengine/configs/) directory of the DiffEngine repository. For example, if you wish to train a DreamBooth model using the Stable Diffusion algorithm, you can use the [`configs/stable_diffusion_dreambooth/stable_diffusion_v15_dreambooth_lora_dog.py`](diffengine/configs/stable_diffusion_dreambooth/stable_diffusion_v15_dreambooth_lora_dog.py).
 
 2. **Start Training**: Open a terminal and run the following command to start training with the selected config:
 
 ```bash
-diffengine train stable_diffusion_v15_dreambooth_lora_dog.py
+diffengine train stable_diffusion_v15_dreambooth_lora_dog
 ```
 
 3. **Monitor Progress and get results**: The training process will begin, and you can track its progress. The outputs of the training will be located in the `work_dirs/stable_diffusion_v15_dreambooth_lora_dog` directory, specifically when using the `stable_diffusion_v15_dreambooth_lora_dog` config.

diff --git a/diffengine/models/archs/ip_adapter.py b/diffengine/models/archs/ip_adapter.py
@@ -8,7 +8,7 @@
     IPAdapterAttnProcessor,
     IPAdapterAttnProcessor2_0,
 )
-from diffusers.models.embeddings import ImageProjection, Resampler
+from diffusers.models.embeddings import ImageProjection, IPAdapterPlusImageProjection
 from diffusers.utils import _get_model_file
 from safetensors import safe_open
 from torch import nn
@@ -181,7 +181,7 @@ def process_ip_adapter_state_dict(  # noqa: PLR0915, C901, PLR0912
         for k, v in image_projection.state_dict().items():
             new_k = k.replace("image_embeds.", "proj.")
             ip_image_projection_state_dict[new_k] = v
-    elif isinstance(image_projection, Resampler):
+    elif isinstance(image_projection, IPAdapterPlusImageProjection):
         for k, v in image_projection.state_dict().items():
             if "2.to" in k:
                 new_k = k.replace("2.to", "0.to")

diff --git a/diffengine/models/editors/ip_adapter/ip_adapter_xl.py b/diffengine/models/editors/ip_adapter/ip_adapter_xl.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch
 from diffusers import DiffusionPipeline
-from diffusers.models.embeddings import ImageProjection, Resampler
+from diffusers.models.embeddings import ImageProjection, IPAdapterPlusImageProjection
 from diffusers.utils import load_image
 from PIL import Image
 from torch import nn
@@ -321,7 +321,7 @@ def prepare_model(self) -> None:
         """
         self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             self.image_encoder_name, subfolder=self.image_encoder_sub_folder)
-        self.image_projection = Resampler(
+        self.image_projection = IPAdapterPlusImageProjection(
             embed_dims=self.image_encoder.config.hidden_size,
             output_dims=self.unet.config.cross_attention_dim,
             hidden_dims=1280,

diff --git a/diffengine/models/editors/ip_adapter/resampler.py b/diffengine/models/editors/ip_adapter/resampler.py
diff --git a/diffengine/models/editors/stable_diffusion_xl_controlnet/__init__.py b/diffengine/models/editors/stable_diffusion_xl_controlnet/__init__.py
@@ -1,6 +1,4 @@
 from .sdxl_controlnet_data_preprocessor import SDXLControlNetDataPreprocessor
 from .stable_diffusion_xl_controlnet import StableDiffusionXLControlNet
-from .stable_diffusion_xl_controlnetxs import StableDiffusionXLControlNetXS
 
-__all__ = ["SDXLControlNetDataPreprocessor", "StableDiffusionXLControlNet",
-           "StableDiffusionXLControlNetXS"]
+__all__ = ["SDXLControlNetDataPreprocessor", "StableDiffusionXLControlNet"]
diff --git a/...table_diffusion_xl_controlnetxs/README.md → projects/controlnetxs/README.md b/...table_diffusion_xl_controlnetxs/README.md → projects/controlnetxs/README.md
diff --git a/...olnet/stable_diffusion_xl_controlnetxs.py → ...netxs/stable_diffusion_xl_controlnetxs.py b/...olnet/stable_diffusion_xl_controlnetxs.py → ...netxs/stable_diffusion_xl_controlnetxs.py
@@ -5,10 +5,9 @@
 from mmengine import print_log
 from PIL import Image
 
+from diffengine.models.editors import StableDiffusionXLControlNet
 from diffengine.registry import MODELS
 
-from .stable_diffusion_xl_controlnet import StableDiffusionXLControlNet
-
 
 @MODELS.register_module()
 class StableDiffusionXLControlNetXS(StableDiffusionXLControlNet):

diff --git a/...able_diffusion_xl_controlnetxs_fill50k.py → ...able_diffusion_xl_controlnetxs_fill50k.py b/...able_diffusion_xl_controlnetxs_fill50k.py → ...able_diffusion_xl_controlnetxs_fill50k.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ dependencies = [
     "torch>=2.0.1",
     "torchvision>=0.15.2",
     "datasets>=2.14.6",
-    "diffusers>=0.24.0",
+    "diffusers>=0.25.0",
     "mmengine>=0.10.1",
     "sentencepiece>=0.1.99",
     "tqdm",

diff --git a/tests/test_models/test_archs.py b/tests/test_models/test_archs.py
@@ -2,7 +2,7 @@
 
 import pytest
 from diffusers import UNet2DConditionModel
-from diffusers.models.embeddings import ImageProjection, Resampler
+from diffusers.models.embeddings import ImageProjection, IPAdapterPlusImageProjection
 from peft import LoHaConfig, LoKrConfig, LoraConfig, OFTConfig
 
 from diffengine.models.archs import (
@@ -79,7 +79,7 @@ def test_process_ip_adapter_state_dict():
     assert "proj.weight" in proj_state_dict["image_proj"]
     assert len(proj_state_dict["ip_adapter"]) == 24
 
-    resampler = Resampler(
+    resampler = IPAdapterPlusImageProjection(
         embed_dims=32,
         output_dims=32,
         hidden_dims=128,