aimv2-large-patch14-224-lit
671
6
—
by
apple
Image Model
OTHER
New
671 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
We introduce the AIMv2 family of vision models pre-trained with a multimodal autoregressive objective.
Code Examples
Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Usagepythontransformers
import requests
from PIL import Image
from transformers import AutoProcessor, AutoModel
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = ["Picture of a dog.", "Picture of a cat.", "Picture of a horse."]
processor = AutoProcessor.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
)
model = AutoModel.from_pretrained(
"apple/aimv2-large-patch14-224-lit",
revision="c2cd59a786c4c06f39d199c50d08cc2eab9f8605",
trust_remote_code=True,
)
inputs = processor(
images=image,
text=text,
add_special_tokens=True,
truncation=True,
padding=True,
return_tensors="pt",
)
outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=-1)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.