flava-full
2.7M
42
512
Small context
license:bsd-3-clause
by
facebook
Other
OTHER
High
2.7M downloads
Battle-tested
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
--- license: bsd-3-clause ---
Code Examples
FlavaModelpythontransformers
from PIL import Image
import requests
from transformers import FlavaProcessor, FlavaModel
model = FlavaModel.from_pretrained("facebook/flava-full")
processor = FlavaProcessor.from_pretrained("facebook/flava-full")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(
text=["a photo of a cat", "a photo of a dog"], images=[image, image], return_tensors="pt", padding="max_length", max_length=77
)
outputs = model(**inputs)
image_embeddings = outputs.image_embeddings # Batch size X (Number of image patches + 1) x Hidden size => 2 X 197 X 768
text_embeddings = outputs.text_embeddings # Batch size X (Text sequence length + 1) X Hidden size => 2 X 77 X 768
multimodal_embeddings = outputs.multimodal_embeddings # Batch size X (Number of image patches + Text Sequence Length + 3) X Hidden size => 2 X 275 x 768
# Multimodal embeddings can be used for multimodal tasks such as VQA
## Pass only image
from transformers import FlavaFeatureExtractor
feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")
inputs = feature_extractor(images=[image, image], return_tensors="pt")
outputs = model(**inputs)
image_embeddings = outputs.image_embeddings
## Pass only text
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")
inputs = tokenizer(["a photo of a cat", "a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)
outputs = model(**inputs)
text_embeddings = outputs.text_embeddingsEncode Imagepythontransformers
from PIL import Image
import requests
from transformers import FlavaFeatureExtractor, FlavaModel
model = FlavaModel.from_pretrained("facebook/flava-full")
feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=[image], return_tensors="pt")
image_embedding = model.get_image_features(**inputs)Encode Textpythontransformers
from PIL import Image
from transformers import BertTokenizer, FlavaModel
model = FlavaModel.from_pretrained("facebook/flava-full")
tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")
inputs = tokenizer(text=["a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)
text_embedding = model.get_text_features(**inputs)ITM logitspythontransformers
from PIL import Image
import requests
from transformers import FlavaFeatureExtractor, FlavaImageModel
model = FlavaImageModel.from_pretrained("facebook/flava-full")
feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=[image], return_tensors="pt")
outputs = model(**inputs)
image_embeddings = outputs.last_hidden_stateFlavaTextModelpythontransformers
from PIL import Image
from transformers import BertTokenizer, FlavaTextModel
model = FlavaTextModel.from_pretrained("facebook/flava-full")
tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")
inputs = tokenizer(text=["a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)
outputs = model(**inputs)
text_embeddings = outputs.last_hidden_stateDeploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.