From 1bc67dcc098da4791f3f6108d07ed44e5cebfeee Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Sun, 25 Dec 2022 23:52:21 -0500 Subject: [PATCH 1/2] Thanks to @patrickvonplaten for this fix. [Depth2Img] Correct timestep for additive noise. --- scripts/gradio/depth2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gradio/depth2img.py b/scripts/gradio/depth2img.py index c791a4d0..e4cc2445 100644 --- a/scripts/gradio/depth2img.py +++ b/scripts/gradio/depth2img.py @@ -100,7 +100,7 @@ def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=No if not do_full_sample: # encode (scaled latent) z_enc = sampler.stochastic_encode( - z, torch.tensor([t_enc] * num_samples).to(model.device)) + z, torch.tensor([t_enc - 1] * num_samples).to(model.device)) else: z_enc = torch.randn_like(z) # decode it From 2bdb5ba42275f7d0d25607f06544ba2fb5b5d825 Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Mon, 26 Dec 2022 22:52:26 -0500 Subject: [PATCH 2/2] Thanks to @patrickvonplaten for this fix. Correct timestep for initial noise addition --- scripts/img2img.py | 2 +- scripts/streamlit/depth2img.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/img2img.py b/scripts/img2img.py index 9085ba9d..2f1387a7 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -244,7 +244,7 @@ def main(): c = model.get_learned_conditioning(prompts) # encode (scaled latent) - z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc] * batch_size).to(device)) + z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc - 1] * batch_size).to(device)) # decode it samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=opt.scale, unconditional_conditioning=uc, ) diff --git a/scripts/streamlit/depth2img.py b/scripts/streamlit/depth2img.py index 7f802234..a7c845ae 100644 --- a/scripts/streamlit/depth2img.py +++ b/scripts/streamlit/depth2img.py @@ -93,7 +93,7 @@ def paint(sampler, image, prompt, t_enc, seed, scale, num_samples=1, callback=No uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} if not do_full_sample: # encode (scaled latent) - z_enc = sampler.stochastic_encode(z, torch.tensor([t_enc] * num_samples).to(model.device)) + z_enc = sampler.stochastic_encode(z, torch.tensor([t_enc - 1] * num_samples).to(model.device)) else: z_enc = torch.randn_like(z) # decode it