MingTok Vision

729
30
2 languages
license:apache-2.0
by
inclusionAI
Image Model
OTHER
New
729 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

MingTok: A Unified Tokenizer for Visual Understanding and Generation without Vector Quantization šŸ“‘ Technical Report | šŸ“– Project Page | šŸ¤— Hugging Face | šŸ¤– M...

Code Examples

Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)
Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)
Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)
Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)
Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)
Usagepython
# build MingTok

from mingtok.modeling_mingtok import MingTok

mingtok_model = MingTok.from_pretrained("inclusionAI/MingTok-Vision")
mingtok_model = mingtok_model.cuda()

img_path = "mingtok/asset/mingtok.png"
save_path = "mingtok/asset/mingtok_recon.png"

# loading original image
image = Image.open(img_path).convert("RGB")
processor = CenterCropProcessor(image_size=512, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
image = processor(image).cuda().unsqueeze(0)

# performing reconstruction
with torch.no_grad():
  image_recon = mingtok_model.forward_enc_dec(image)
  # latent = mingtok_model.low_level_encoder(image)
  # semantic_feat = mingtok_model.semantic_decoder(latent)['x_norm_patchtokens']
  # image_recon = mingtok_model.forward_pixel_decoder(semantic_feat)


output_mean = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_std = torch.Tensor([0.5,0.5,0.5]).view(1,-1,1,1).cuda()
output_image = (image_recon*output_std + output_mean)[0]
output_image = T.ToPILImage()(output_image)
output_image.save(save_path)

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.