refactor: cleanup

pykeio · Mar 11, 2023 · 4e558b9 · 4e558b9
1 parent bc24b29
commit 4e558b9
Show file tree

Hide file tree

Showing 10 changed files with 129 additions and 53 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,4 @@ __pycache__
 **/tokenizer.json
 **/diffusers.json
 Cargo.lock
+models/
diff --git a/Cargo.toml b/Cargo.toml
@@ -41,6 +41,7 @@ tokenizers = { version = "0.13", optional = true, default-features = false, feat
 [dev-dependencies]
 tokio = { version = "1.0", features = [ "full" ] }
 image = { version = "0.24", default-features = false, features = [ "png" ] }
+ort = { version = "1.14", default-features = false, features = [ "download-binaries" ] }
 
 requestty = "0.5"
 kdam = "0.3"

diff --git a/docs/src/importing_models.md b/docs/src/importing_models.md
@@ -16,6 +16,37 @@ $ python3 -m pip install torch --extra-index-url https://download.pytorch.org/wh
 $ python3 -m pip install -r requirements.txt
 ```
 
+## Ready-to-use models
+pyke provides some ready-to-use models. They are available for download at the links below. You can download them with a simple `git clone`.
+
+- [Stable Diffusion v1.5](https://huggingface.co/pykeio/stable-diffusion-v1-5)
+- [AbyssOrangeMix2](https://huggingface.co/pykeio/abyss-v2)
+- [Anything V4](https://huggingface.co/pykeio/anything-v4)
+
+## Converting from a .ckpt or .safetensors
+The `sd2pyke.py` script will convert an original Stable Diffusion checkpoint to a pyke Diffusers model. The script can read a traditional `*.ckpt` file or a new `*.safetensors` checkpoint.
+
+Due to memory constraints, the model will first be converted to a Hugging Face model, then converted to a pyke Diffusers model. This process should take only a few minutes.
+
+```sh
+$ python3 scripts/sd2pyke.py ~/stable-diffusion.safetensors ~/diffusers-models/stable-diffusion-v1-5 -C v1-inference.yaml
+```
+
+Note the `-C v1-inference.yaml`: you must provide the path to a Stable Diffusion config file. In 99.9% of cases, this is `v1-inference.yaml`, which already comes included in the repository. If your model uses a different architecture (i.e. if it's based on Stable Diffusion v2), make sure to download & pass the path to the proper config file.
+
+You can also convert the model directly to float16 format. Float16 models use less disk space, RAM, and run faster on modern GPUs, with little quality loss. Float16 is recommended for GPU inference, especially on systems with low VRAM (< 10 GB).
+```sh
+$ python3 scripts/hf2pyke.py --fp16 ~/stable-diffusion.safetensors ~/diffusers-models/stable-diffusion-v1-5-float16 -C v1-inference.yaml
+```
+
+### More options
+`sd2pyke.py` has a few more options for performance or compatibility. Below are some commonly used ones. See `python3 scripts/sd2pyke.py --help` for the full list of options.
+
+- `--skip-safety-checker`: Skips converting the safety checker. Enable this when converting models without a safety checker, like Stable Diffusion v2-based models.
+- `--override-unet-sample-size`: Override the sample size passed to the UNet. Set to `64` when converting Stable Diffusion v2 models on low-VRAM devices to avoid an OOM crash.
+- `--prediction-type`: Set this to `"v-prediction"` for Stable Diffusion v2 based models.
+- `--ema`: Extract the EMA weights. By default, non-EMA weights will be extracted. EMA weights may produce better quality images.
+
 ## Converting from a Hugging Face Diffusers model
 > Some Hugging Face models require you to log in to download them. To do this, you'll need create or copy a token [from the Hugging Face settings page](https://hf.co/settings/tokens). The `READ` permission is suffice. Then, run `huggingface-cli login` and enter your token to log in. Adding a Git credential is not required.
 
@@ -31,18 +62,14 @@ If you have a Hugging Face model saved to disk, you can also provide the path to
 $ python3 scripts/hf2pyke.py /mnt/storage/stable-diffusion-v1-5 ~/diffusers-models/stable-diffusion-v1-5
 ```
 
-You can also convert the model directly to float16 format. Float16 models use less disk space, RAM, and run faster on modern GPUs, with little quality loss. Float16 is recommended for GPU inference, especially on systems with low VRAM (< 10 GB).
+Similar to `sd2pyke.py`, you can convert the model directly to float16 format.
 ```sh
-$ python3 scripts/hf2pyke --fp16 runwayml/stable-diffusion-v1-5 ~/diffusers-models/stable-diffusion-v1-5-float16
+$ python3 scripts/hf2pyke.py --fp16 runwayml/stable-diffusion-v1-5 ~/diffusers-models/stable-diffusion-v1-5-float16
 ```
 
 ### More options
 `hf2pyke.py` has a few more options for performance or compatibility. Below are some commonly used ones. See `python3 scripts/hf2pyke.py --help` for the full list of options.
 
 - `--skip-safety-checker`: Skips converting the safety checker. Enable this when converting models without a safety checker, like Stable Diffusion v2-based models.
-- `--simplify-small-models`: Runs ONNX simplifier on all models but the UNet for a speed boost.
-- `--overrude-unet-sample-size`: Override the sample size passed to the UNet. Set to `64` when converting Stable Diffusion v2 models on low-VRAM devices to avoid an OOM crash.
+- `--override-unet-sample-size`: Override the sample size passed to the UNet. Set to `64` when converting Stable Diffusion v2 models on low-VRAM devices to avoid an OOM crash.
 - `--no-accelerate`: Disables using `accelerate`. Enable this on Apple silicon devices, or if you get errors involving `device_map`.
-
-## Converting from a .ckpt or .safetensors
-soon™️
diff --git a/examples/stable-diffusion-interactive.rs b/examples/stable-diffusion-interactive.rs
@@ -1,4 +1,4 @@
-use std::{cell::RefCell, env, sync::Arc};
+use std::{cell::RefCell, env};
 
 use kdam::{tqdm, BarExt};
 use pyke_diffusers::{

diff --git a/examples/stable-diffusion-memory-efficient.rs b/examples/stable-diffusion-memory-efficient.rs
@@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use pyke_diffusers::{
 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionMemoryOptimizedPipeline, StableDiffusionOptions,
 	StableDiffusionTxt2ImgOptions

diff --git a/examples/stable-diffusion.rs b/examples/stable-diffusion.rs
@@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use pyke_diffusers::{
 	ArenaExtendStrategy, CUDADeviceOptions, DiffusionDevice, DiffusionDeviceControl, EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults,
 	StableDiffusionOptions, StableDiffusionPipeline, StableDiffusionTxt2ImgOptions

diff --git a/src/lib.rs b/src/lib.rs
@@ -6,20 +6,23 @@
 //! ONNX Runtime provides optimized inference for both CPUs and GPUs, including both NVIDIA & AMD GPUs via DirectML.
 //!
 //! `pyke-diffusers` is focused on ease of use, with an API closely modeled after Hugging Face diffusers:
-//! ```ignore
+//! ```no_run
+//! # fn main() -> anyhow::Result<()> {
 //! use std::sync::Arc;
 //!
 //! use pyke_diffusers::{
-//! 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionOptions, StableDiffusionPipeline,
-//! 	StableDiffusionTxt2ImgOptions
+//! 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionOptions,
+//! 	StableDiffusionPipeline, StableDiffusionTxt2ImgOptions
 //! };
 //!
-//! let environment = Arc::new(OrtEnvironment::builder().build()?);
+//! let environment = OrtEnvironment::default().into_arc();
 //! let mut scheduler = EulerDiscreteScheduler::stable_diffusion_v1_optimized_default()?;
 //! let pipeline =
 //! 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
 //!
 //! let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, StableDiffusionTxt2ImgOptions::default())?;
+//! # Ok(())
+//! # }
 //! ```
 //!
 //! See [`StableDiffusionPipeline`] for more info on the Stable Diffusion pipeline.
@@ -104,12 +107,13 @@ impl From<CuDNNConvolutionAlgorithmSearch> for String {
 /// Device options for the CUDA execution provider.
 ///
 /// For low-VRAM devices running Stable Diffusion v1, it's best to use a float16 model with the following parameters:
-/// ```ignore
-/// CUDADeviceOptions {
+/// ```
+/// # use pyke_diffusers::{ArenaExtendStrategy, CUDADeviceOptions};
+/// let options = CUDADeviceOptions {
 /// 	memory_limit: Some(3000000000),
 /// 	arena_extend_strategy: Some(ArenaExtendStrategy::SameAsRequested),
 /// 	..Default::default()
-/// }
+/// };
 /// ```
 #[derive(Default, Debug, Clone, PartialEq, Eq)]
 pub struct CUDADeviceOptions {
@@ -199,11 +203,12 @@ impl From<DiffusionDevice> for ExecutionProvider {
 ///
 /// For Stable Diffusion on GPUs with <6 GB VRAM, it may be favorable to place the text encoder, VAE decoder, and
 /// safety checker on the CPU so the much more intensive UNet can be placed on the GPU:
-/// ```ignore
-/// DiffusionDeviceControl {
+/// ```
+/// # use pyke_diffusers::{DiffusionDevice, DiffusionDeviceControl};
+/// let devices = DiffusionDeviceControl {
 /// 	unet: DiffusionDevice::CUDA(0, None),
 /// 	..Default::default()
-/// }
+/// };
 /// ```
 #[derive(Debug, Clone)]
 pub struct DiffusionDeviceControl {
@@ -222,7 +227,10 @@ pub struct DiffusionDeviceControl {
 impl DiffusionDeviceControl {
 	/// Constructs [`DiffusionDeviceControl`] with all models on the same device.
 	///
-	/// ```ignore
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{DiffusionDevice, DiffusionDeviceControl, OrtEnvironment, StableDiffusionPipeline, StableDiffusionOptions};
+	/// # let environment = OrtEnvironment::default().into_arc();
 	/// let pipeline = StableDiffusionPipeline::new(
 	/// 	&environment,
 	/// 	"./stable-diffusion-v1-5/",
@@ -231,6 +239,8 @@ impl DiffusionDeviceControl {
 	/// 		..Default::default()
 	/// 	}
 	/// )?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	///
 	/// Note that if you are setting `memory_limit` in [`CUDADeviceOptions`], the memory limit is **per session** (aka

diff --git a/src/pipelines/safe_stable_diffusion.rs b/src/pipelines/safe_stable_diffusion.rs
@@ -11,12 +11,12 @@ use crate::{schedulers::DiffusionScheduler, Prompt};
 /// A [Stable Diffusion](https://github.com/CompVis/stable-diffusion) pipeline designed to guide generation away from
 /// harmful or unsafe imagery.
 ///
-/// ```ignore
+/// ```no_run
 /// use std::sync::Arc;
 ///
 /// use pyke_diffusers::{
-/// 	EulerDiscreteScheduler, OrtEnvironment, SafeStableDiffusionPipeline, SchedulerOptimizedDefaults, StableDiffusionOptions,
-/// 	StableDiffusionTxt2ImgOptions
+/// 	EulerDiscreteScheduler, OrtEnvironment, SafeStableDiffusionPipeline, SchedulerOptimizedDefaults,
+/// 	StableDiffusionOptions, StableDiffusionTxt2ImgOptions
 /// };
 ///
 /// let environment = Arc::new(OrtEnvironment::builder().build()?);
@@ -34,7 +34,7 @@ pub struct SafeStableDiffusionPipeline {
 impl SafeStableDiffusionPipeline {
 	/// Creates a new Stable Diffusion pipeline, loading models from `root`.
 	///
-	/// ```ignore
+	/// ```no_run
 	/// let pipeline =
 	/// 	SafeStableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", &StableDiffusionOptions::default())?;
 	/// ```
@@ -65,9 +65,14 @@ impl SafeStableDiffusionPipeline {
 		self.pipeline.encode_prompt(prompt, do_classifier_free_guidance, negative_prompt)
 	}
 
+	/// Decodes UNet latents via a cheap approximation into an array of [`image::DynamicImage`]s.
+	pub fn approximate_decode_latents(&self, latents: Array4<f32>) -> anyhow::Result<Vec<DynamicImage>> {
+		self.pipeline.approximate_decode_latents(latents)
+	}
+
 	/// Decodes UNet latents via the variational autoencoder into an array of [`image::DynamicImage`]s.
-	pub fn decode_latents(&self, latents: Array4<f32>, options: &StableDiffusionTxt2ImgOptions) -> anyhow::Result<Vec<DynamicImage>> {
-		self.pipeline.decode_latents(latents, options)
+	pub fn decode_latents(&self, latents: Array4<f32>) -> anyhow::Result<Vec<DynamicImage>> {
+		self.pipeline.decode_latents(latents)
 	}
 
 	/// Generates images from given text prompt(s). Returns a vector of [`image::DynamicImage`]s, using float32 buffers.

diff --git a/src/pipelines/stable_diffusion/impl_main.rs b/src/pipelines/stable_diffusion/impl_main.rs
@@ -21,20 +21,21 @@ use crate::{
 
 /// A [Stable Diffusion](https://github.com/CompVis/stable-diffusion) pipeline.
 ///
-/// ```ignore
-/// use std::sync::Arc;
-///
+/// ```no_run
+/// # fn main() -> anyhow::Result<()> {
 /// use pyke_diffusers::{
-/// 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionOptions, StableDiffusionPipeline,
-/// 	StableDiffusionTxt2ImgOptions
+/// 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionOptions,
+/// 	StableDiffusionPipeline, StableDiffusionTxt2ImgOptions
 /// };
 ///
-/// let environment = Arc::new(OrtEnvironment::builder().build()?);
+/// let environment = OrtEnvironment::default().into_arc();
 /// let mut scheduler = EulerDiscreteScheduler::stable_diffusion_v1_optimized_default()?;
 /// let pipeline =
 /// 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
 ///
 /// let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, StableDiffusionTxt2ImgOptions::default())?;
+/// # Ok(())
+/// # }
 /// ```
 pub struct StableDiffusionPipeline {
 	environment: Arc<Environment>,
@@ -53,9 +54,14 @@ pub struct StableDiffusionPipeline {
 impl StableDiffusionPipeline {
 	/// Creates a new Stable Diffusion pipeline, loading models from `root`.
 	///
-	/// ```ignore
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{StableDiffusionPipeline, StableDiffusionOptions, OrtEnvironment};
+	/// # let environment = OrtEnvironment::default().into_arc();
 	/// let pipeline =
 	/// 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	pub fn new(environment: &Arc<Environment>, root: impl Into<PathBuf>, options: StableDiffusionOptions) -> anyhow::Result<Self> {
 		let root: PathBuf = root.into();
@@ -132,9 +138,15 @@ impl StableDiffusionPipeline {
 	///
 	/// An additional [`StableDiffusionOptions`] parameter can be used to move models to another device.
 	///
-	/// ```ignore
-	/// let mut pipeline = StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{StableDiffusionPipeline, StableDiffusionOptions, OrtEnvironment};
+	/// # let environment = OrtEnvironment::default().into_arc();
+	/// let mut pipeline =
+	/// 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
 	/// pipeline = pipeline.replace("./waifu-diffusion-v1-3/", None)?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	pub fn replace(mut self, new_root: impl Into<PathBuf>, options: Option<StableDiffusionOptions>) -> anyhow::Result<Self> {
 		let new_root: PathBuf = new_root.into();
@@ -317,12 +329,18 @@ impl StableDiffusionPipeline {
 	/// # Examples
 	///
 	/// Simple text-to-image:
-	/// ```ignore
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{StableDiffusionPipeline, EulerDiscreteScheduler, StableDiffusionOptions, StableDiffusionTxt2ImgOptions, OrtEnvironment};
+	/// # let environment = OrtEnvironment::default().into_arc();
+	/// # let mut scheduler = EulerDiscreteScheduler::default();
 	/// let pipeline =
-	/// 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", &StableDiffusionOptions::default())?;
+	/// 	StableDiffusionPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
 	///
-	/// let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, &StableDiffusionTxt2ImgOptions::default())?;
+	/// let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, StableDiffusionTxt2ImgOptions::default())?;
 	/// imgs[0].clone().into_rgb8().save("result.png")?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	pub fn txt2img<S: DiffusionScheduler>(
 		&self,

diff --git a/src/pipelines/stable_diffusion/impl_memory_optimized.rs b/src/pipelines/stable_diffusion/impl_memory_optimized.rs
@@ -28,20 +28,24 @@ use crate::{
 /// be abysmal compared to the standard [`super::StableDiffusionPipeline`], as models will be constantly loaded and
 /// unloaded.
 ///
-/// ```ignore
-/// use std::sync::Arc;
-///
+/// ```no_run
+/// # fn main() -> anyhow::Result<()> {
 /// use pyke_diffusers::{
-/// 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionOptions,
-/// 	StableDiffusionMemoryOptimizedPipeline, StableDiffusionTxt2ImgOptions
+/// 	EulerDiscreteScheduler, OrtEnvironment, SchedulerOptimizedDefaults, StableDiffusionMemoryOptimizedPipeline,
+/// 	StableDiffusionOptions, StableDiffusionTxt2ImgOptions
 /// };
 ///
-/// let environment = Arc::new(OrtEnvironment::builder().build()?);
+/// let environment = OrtEnvironment::default().into_arc();
 /// let mut scheduler = EulerDiscreteScheduler::stable_diffusion_v1_optimized_default()?;
-/// let pipeline =
-/// 	StableDiffusionMemoryOptimizedPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
+/// let pipeline = StableDiffusionMemoryOptimizedPipeline::new(
+/// 	&environment,
+/// 	"./stable-diffusion-v1-5/",
+/// 	StableDiffusionOptions::default()
+/// )?;
 ///
 /// let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, StableDiffusionTxt2ImgOptions::default())?;
+/// # Ok(())
+/// # }
 /// ```
 pub struct StableDiffusionMemoryOptimizedPipeline {
 	environment: Arc<Environment>,
@@ -56,9 +60,17 @@ impl StableDiffusionMemoryOptimizedPipeline {
 	/// Creates a new Stable Diffusion memory-optimized pipeline. This will check that the necessary models exist in
 	/// `root` but will not load them until a routine is run.
 	///
-	/// ```ignore
-	/// let pipeline =
-	/// 	StableDiffusionMemoryOptimizedPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{StableDiffusionMemoryOptimizedPipeline, StableDiffusionOptions, OrtEnvironment};
+	/// # let environment = OrtEnvironment::default().into_arc();
+	/// let pipeline = StableDiffusionMemoryOptimizedPipeline::new(
+	/// 	&environment,
+	/// 	"./stable-diffusion-v1-5/",
+	/// 	StableDiffusionOptions::default()
+	/// )?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	pub fn new(environment: &Arc<Environment>, root: impl Into<PathBuf>, options: StableDiffusionOptions) -> anyhow::Result<Self> {
 		let root: PathBuf = root.into();
@@ -197,12 +209,18 @@ impl StableDiffusionMemoryOptimizedPipeline {
 	/// # Examples
 	///
 	/// Simple text-to-image:
-	/// ```ignore
+	/// ```no_run
+	/// # fn main() -> anyhow::Result<()> {
+	/// # use pyke_diffusers::{EulerDiscreteScheduler, StableDiffusionMemoryOptimizedPipeline, StableDiffusionOptions, StableDiffusionTxt2ImgOptions, OrtEnvironment};
+	/// # let environment = OrtEnvironment::default().into_arc();
+	/// # let mut scheduler = EulerDiscreteScheduler::default();
 	/// let pipeline =
 	/// 	StableDiffusionMemoryOptimizedPipeline::new(&environment, "./stable-diffusion-v1-5/", StableDiffusionOptions::default())?;
 	///
 	/// let imgs = pipeline.txt2img("photo of a red fox", &mut scheduler, StableDiffusionTxt2ImgOptions::default())?;
 	/// imgs[0].clone().into_rgb8().save("result.png")?;
+	/// # Ok(())
+	/// # }
 	/// ```
 	pub fn txt2img<S: DiffusionScheduler>(
 		&self,