flava-full

2.7M
42
512
Small context
license:bsd-3-clause
by
facebook
Other
OTHER
High
2.7M downloads
Battle-tested
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

--- license: bsd-3-clause ---

Code Examples

FlavaModelpythontransformers
from PIL import Image
import requests

from transformers import FlavaProcessor, FlavaModel

model = FlavaModel.from_pretrained("facebook/flava-full")
processor = FlavaProcessor.from_pretrained("facebook/flava-full")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(
  text=["a photo of a cat", "a photo of a dog"], images=[image, image], return_tensors="pt", padding="max_length", max_length=77
)

outputs = model(**inputs)
image_embeddings = outputs.image_embeddings # Batch size X (Number of image patches + 1) x Hidden size => 2 X 197 X 768
text_embeddings = outputs.text_embeddings # Batch size X (Text sequence length + 1) X Hidden size => 2 X 77 X 768
multimodal_embeddings = outputs.multimodal_embeddings # Batch size X (Number of image patches + Text Sequence Length + 3) X Hidden size => 2 X 275 x 768
# Multimodal embeddings can be used for multimodal tasks such as VQA


## Pass only image
from transformers import FlavaFeatureExtractor

feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")
inputs = feature_extractor(images=[image, image], return_tensors="pt")
outputs = model(**inputs)
image_embeddings = outputs.image_embeddings

## Pass only text
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")
inputs = tokenizer(["a photo of a cat", "a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)
outputs = model(**inputs)
text_embeddings = outputs.text_embeddings
Encode Imagepythontransformers
from PIL import Image
import requests

from transformers import FlavaFeatureExtractor, FlavaModel

model = FlavaModel.from_pretrained("facebook/flava-full")
feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = feature_extractor(images=[image], return_tensors="pt")

image_embedding = model.get_image_features(**inputs)
Encode Textpythontransformers
from PIL import Image

from transformers import BertTokenizer, FlavaModel

model = FlavaModel.from_pretrained("facebook/flava-full")
tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")

inputs = tokenizer(text=["a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)

text_embedding = model.get_text_features(**inputs)
ITM logitspythontransformers
from PIL import Image
import requests

from transformers import FlavaFeatureExtractor, FlavaImageModel

model = FlavaImageModel.from_pretrained("facebook/flava-full")
feature_extractor = FlavaFeatureExtractor.from_pretrained("facebook/flava-full")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = feature_extractor(images=[image], return_tensors="pt")

outputs = model(**inputs)
image_embeddings = outputs.last_hidden_state
FlavaTextModelpythontransformers
from PIL import Image

from transformers import BertTokenizer, FlavaTextModel

model = FlavaTextModel.from_pretrained("facebook/flava-full")
tokenizer = BertTokenizer.from_pretrained("facebook/flava-full")

inputs = tokenizer(text=["a photo of a dog"], return_tensors="pt", padding="max_length", max_length=77)

outputs = model(**inputs)
text_embeddings = outputs.last_hidden_state

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.