LLM2CLIP-Openai-L-14-224

Name: LLM2CLIP-Openai-L-14-224
Author: microsoft

500

license:apache-2.0

microsoft

Language Model

OTHER

New

500 downloads

Early-stage

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

Unknown

Mobile

Laptop

Server

Quick Summary

LLM2CLIP: Extending the Capability Boundaries of CLIP through Large Language Models Weiquan Huang 1 , Aoqi Wu 1 , Yifan Yang 2† , Xufang Luo 2 , Yuqing Yang 2 ,...

Code Examples

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Usagepythontransformers

from PIL import Image
from transformers import AutoModel
from transformers import CLIPImageProcessor
import torch
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

image_path = "CLIP.png"
model_name_or_path = "microsoft/LLM2CLIP-Openai-L-14-224" # or /path/to/local/LLM2CLIP-Openai-L-14

processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = AutoModel.from_pretrained(
    model_name_or_path, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True).to('cuda').eval()

image = Image.open(image_path)
input_pixels = processor(images=image, return_tensors="pt").pixel_values.to('cuda')

with torch.no_grad(), torch.cuda.amp.autocast():
    outputs = model.get_image_features(input_pixels)

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.