xVLM2Vec_image_loss
3
5 languages
—
by
swap-uniba
Code Model
OTHER
New
3 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
xVLM2Vecimageloss is a Large Vision-Language Model (LVLM) aligned over TIGER-Lab/VLM2Vec-LoRA.
Code Examples
text
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vectext
git clone https://github.com/swapUniba/xVLM2Vec
cd xVLM2Vecpythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)pythontransformers
from src.mmeb_src.model import MMEBModel
from src.mmeb_src.arguments import ModelArguments
from PIL import Image
from transformers import AutoProcessor
import torch
import requests
model_args = ModelArguments(
model_name='microsoft/Phi-3.5-vision-instruct',
checkpoint_path="m-elio/xVLM2Vec_image_loss",
pooling='last',
normalize=True,
lora=False,
)
processor = AutoProcessor.from_pretrained(
"microsoft/Phi-3.5-vision-instruct",
trust_remote_code=True,
num_crops=4,
)
model = MMEBModel.load(model_args)
model.eval()
model = model.to('cuda', dtype=torch.bfloat16)
with torch.no_grad():
inputs = processor("<|image_1|>\nTrova una didascalia che descriva l'immagine di tutti i giorni", [Image.open(requests.get("http://images.cocodataset.org/train2017/000000514915.jpg", stream=True).raw)])
inputs = {key: value.to('cuda') for key, value in inputs.items()}
qry_output = model(qry=inputs)["qry_reps"]
strings = ['Un cane steso sul pavimento', 'Un gatto steso sul pavimento']
inputs = processor(strings)
inputs = {key: value.to('cuda') for key, value in inputs.items()}
tgt_output = model(tgt=inputs)["tgt_reps"]
cos_sim = model.compute_similarity(qry_output, tgt_output).squeeze()
for string_, sim_ in zip(strings, cos_sim):
print(string_, '=', sim_)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.