This is a fork of OPEN-MAGVIT2: An Open-source Project Toward Democratizing Auto-Regressive Visual Generation in order to make it a package for easy usage.
pip install open-magvit2
wget https://huggingface.co/TencentARC/Open-MAGVIT2/resolve/main/imagenet_256_L.ckpt
import pkg_resources
import torch
from omegaconf import OmegaConf
from open_magvit2.reconstruct import load_vqgan_new
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config_path = pkg_resources.resource_filename('open_magvit2', 'configs/gpu/imagenet_lfqgan_256_L.yaml')
config = OmegaConf.load(config_path)
model = load_vqgan_new(config, "imagenet_256_L.ckpt").to(DEVICE)
from PIL import Image
import torchvision.transforms as transforms
image = Image.open('1165.jpg')
image_tensor = transforms.ToTensor()(image)
batch = image_tensor.unsqueeze(0)
with torch.no_grad():
quant, emb_loss, tokens, loss_breakdown = model.encode(image_tensor)
- decode from embeddings
from open_magvit2.reconstruct import custom_to_pil
with torch.no_grad():
tensor = model.decode(quant)
reconstructed_image = custom_to_pil(tensor[0])
- decode from tokens (i.e. ids)
from einops import rearrange
from open_magvit2.reconstruct import custom_to_pil
x = rearrange(tokens, "(b s) -> b s", b=1)
q = model.quantize.get_codebook_entry(x, (1, 16, 16, 18), order='')
with torch.no_grad():
tensor2 = model.decode(q)
reconstructed_image2 = custom_to_pil(tensor2[0])
Check this notebook open-MAGVIT2-package-inference-example.ipynb