MingTok Vision
729
30
2 languages
license:apache-2.0
by
inclusionAI
Image Model
OTHER
New
729 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
MingTok: A Unified Tokenizer for Visual Understanding and Generation without Vector Quantization š Technical Report ļ½ š Project Page ļ½ š¤ Hugging Face ļ½ š¤ M...
Code Examples
Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Usagepython
# build MingTok
from mingtok.modeling_mingtok import MingTok
mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()
img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"
# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)
# performing reconstruction
with torch.no_grad():
image_recon = mingtok_model.forward_enc_dec(image)
# latent = mingtok_model.low_level_encoder(image)
# semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
# image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)
output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.