Skip to content

Commit

Permalink
fix upscale and add some compile options
Browse files Browse the repository at this point in the history
  • Loading branch information
vladmandic committed Nov 17, 2023
1 parent 7ef41af commit d46dddd
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 25 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@
- Support `--ckpt none` to skip loading a model
- **XYZ grid**
- Add refiner options to XYZ Grid
- Add option to create only subimages in XYZ grid, thanks @midcoastal
- Add option to create only subgrids in XYZ grid, thanks @midcoastal
- Allow custom font, background and text color in settings
- **Fixes**
- Fix `params.txt` saved before actual image
- Fix inpaint
- Fix manual grid image save
- Fix img2img init image save
- Fix upscale in txt2img for batch counts when no hires is used
- More uniform models paths
- Safe scripts callback execution
- Improved extension compatibility
Expand Down
24 changes: 12 additions & 12 deletions installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,26 +775,26 @@ def install_requirements():
# set environment variables controling the behavior of various libraries
def set_environment():
log.debug('Setting environment tuning')
os.environ.setdefault('USE_TORCH', '1')
os.environ.setdefault('TF_CPP_MIN_LOG_LEVEL', '2')
os.environ.setdefault('ACCELERATE', 'True')
os.environ.setdefault('FORCE_CUDA', '1')
os.environ.setdefault('ATTN_PRECISION', 'fp16')
os.environ.setdefault('PYTORCH_CUDA_ALLOC_CONF', 'garbage_collection_threshold:0.8,max_split_size_mb:512')
os.environ.setdefault('CUDA_LAUNCH_BLOCKING', '0')
os.environ.setdefault('CUDA_CACHE_DISABLE', '0')
os.environ.setdefault('CUDA_AUTO_BOOST', '1')
os.environ.setdefault('CUDA_MODULE_LOADING', 'LAZY')
os.environ.setdefault('CUDA_CACHE_DISABLE', '0')
os.environ.setdefault('CUDA_DEVICE_DEFAULT_PERSISTING_L2_CACHE_PERCENTAGE_LIMIT', '0')
os.environ.setdefault('CUDA_LAUNCH_BLOCKING', '0')
os.environ.setdefault('CUDA_MODULE_LOADING', 'LAZY')
os.environ.setdefault('FORCE_CUDA', '1')
os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
os.environ.setdefault('SAFETENSORS_FAST_GPU', '1')
os.environ.setdefault('NUMEXPR_MAX_THREADS', '16')
os.environ.setdefault('PYTHONHTTPSVERIFY', '0')
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
os.environ.setdefault('HF_HUB_DISABLE_EXPERIMENTAL_WARNING', '1')
os.environ.setdefault('UVICORN_TIMEOUT_KEEP_ALIVE', '60')
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
os.environ.setdefault('K_DIFFUSION_USE_COMPILE', '0')
os.environ.setdefault('NUMEXPR_MAX_THREADS', '16')
os.environ.setdefault('PYTHONHTTPSVERIFY', '0')
os.environ.setdefault('PYTORCH_CUDA_ALLOC_CONF', 'garbage_collection_threshold:0.8,max_split_size_mb:512')
os.environ.setdefault('SAFETENSORS_FAST_GPU', '1')
os.environ.setdefault('TF_CPP_MIN_LOG_LEVEL', '2')
os.environ.setdefault('TF_ENABLE_ONEDNN_OPTS', '0')
os.environ.setdefault('USE_TORCH', '1')
os.environ.setdefault('UVICORN_TIMEOUT_KEEP_ALIVE', '60')
if sys.platform == 'darwin':
os.environ.setdefault('PYTORCH_ENABLE_MPS_FALLBACK', '1')

Expand Down
18 changes: 8 additions & 10 deletions modules/processing_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ def hires_resize(latents): # input=latents output=pil
if latent_upscaler is not None:
latents = torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"])
first_pass_images = vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil')
p.init_images = []
resized_images = []
for img in first_pass_images:
if latent_upscaler is None:
init_image = images.resize_image(1, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler)
resized_image = images.resize_image(1, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler)
else:
init_image = img
# if is_refiner_enabled:
# init_image = vae_encode(init_image, model=shared.sd_model, full_quality=p.full_quality)
p.init_images.append(init_image)
return p.init_images
resized_image = img
resized_images.append(resized_image)
return resized_images

def save_intermediate(latents, suffix):
for i in range(len(latents)):
Expand Down Expand Up @@ -489,7 +487,7 @@ def calculate_refiner_steps():
return results

# optional hires pass
if p.enable_hr and p.hr_upscaler != 'None' and p.denoising_strength > 0 and len(getattr(p, 'init_images', [])) == 0:
if p.enable_hr and getattr(p, 'hr_upscaler', 'None') != 'None' and len(getattr(p, 'init_images', [])) == 0:
p.is_hr_pass = True
latent_scale_mode = shared.latent_upscale_modes.get(p.hr_upscaler, None) if (hasattr(p, "hr_upscaler") and p.hr_upscaler is not None) else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "None")
if p.is_hr_pass:
Expand All @@ -501,7 +499,7 @@ def calculate_refiner_steps():
save_intermediate(latents=output.images, suffix="-before-hires")
shared.state.job = 'upscale'
output.images = hires_resize(latents=output.images)
if latent_scale_mode is not None or p.hr_force:
if (latent_scale_mode is not None or p.hr_force) and p.denoising_strength > 0:
p.ops.append('hires')
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
recompile_model(hires=True)
Expand All @@ -518,7 +516,7 @@ def calculate_refiner_steps():
guidance_rescale=p.diffusers_guidance_rescale,
output_type='latent' if hasattr(shared.sd_model, 'vae') else 'np',
clip_skip=p.clip_skip,
image=p.init_images,
image=output.images,
strength=p.denoising_strength,
desc='Hires',
)
Expand Down
3 changes: 3 additions & 0 deletions modules/sd_models_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def compile_stablefast(sd_model):
warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)
config.enable_cuda_graph = shared.opts.cuda_compile_fullgraph
config.enable_jit_freeze = shared.opts.diffusers_eval
config.memory_format = torch.channels_last if shared.opts.opt_channelslast else torch.contiguous_format
# config.enable_cnn_optimization
# config.prefer_lowp_gemm
try:
t0 = time.time()
sd_model = sf.compile(sd_model, config)
Expand Down
2 changes: 1 addition & 1 deletion modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def default(obj):
"cuda_compile_vae": OptionInfo(True if cmd_opts.use_openvino else False, "Compile VAE"),
"cuda_compile_upscaler": OptionInfo(True if cmd_opts.use_openvino else False, "Compile upscaler"),
"cuda_compile_backend": OptionInfo("openvino_fx" if cmd_opts.use_openvino else "none", "Model compile backend", gr.Radio, {"choices": ['none', 'inductor', 'cudagraphs', 'aot_ts_nvfuser', 'hidet', 'ipex', 'openvino_fx', 'stable-fast']}),
"cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune']}),
"cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune', 'max-autotune-no-cudagraphs']}),
"cuda_compile_fullgraph": OptionInfo(False, "Model compile fullgraph"),
"cuda_compile_precompile": OptionInfo(False if cmd_opts.use_openvino else True, "Model compile precompile"),
"cuda_compile_verbose": OptionInfo(False, "Model compile verbose mode"),
Expand Down
2 changes: 1 addition & 1 deletion modules/upscaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def compile_upscaler(model, name=""):

if modules.shared.opts.cuda_compile_backend == "openvino_fx":
from modules.intel.openvino import openvino_fx # pylint: disable=unused-import
from modules.sd_models_compile import CompiledModelState
from modules.sd_models_compile import CompiledModelState # pylint: disable=unused-import
torch._dynamo.eval_frame.check_if_dynamo_supported = lambda: True # pylint: disable=protected-access

log_level = logging.WARNING if modules.shared.opts.cuda_compile_verbose else logging.CRITICAL # pylint: disable=protected-access
Expand Down

0 comments on commit d46dddd

Please sign in to comment.