PE-Core-G14-448
298.8K
18
license:apache-2.0
by
facebook
Image Model
OTHER
Good
299K downloads
Production-ready
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
--- license: apache-2.
Code Examples
Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Image and Text Feature extraction with a Trained Modelpythonpytorch
import torch
from PIL import Image
import core.vision_encoder.pe as pe
import core.vision_encoder.transforms as transforms
print("CLIP configs:", pe.CLIP.available_configs())
# CLIP configs: ['PE-Core-G14-448', 'PE-Core-L14-336', 'PE-Core-B16-224']
model = pe.CLIP.from_config("PE-Core-G14-448", pretrained=True) # Downloads from HF
model = model.cuda()
preprocess = transforms.get_image_transform(model.image_size)
tokenizer = transforms.get_text_tokenizer(model.context_length)
image = preprocess(Image.open("docs/assets/cat.png")).unsqueeze(0).cuda()
text = tokenizer(["a diagram", "a dog", "a cat"]).cuda()
with torch.no_grad(), torch.autocast("cuda"):
image_features, text_features, logit_scale = model(image, text)
text_probs = (logit_scale * image_features @ text_features.T).softmax(dim=-1)
print("Label probs:", text_probs) # prints: [[0.0, 0.0, 1.0]]Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.