EuroVLM-9B-Preview

505

4

9.0B

35 languages

license:apache-2.0

by

utter-project

Image Model

OTHER

9B params

New

505 downloads

Early-stage

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

21GB+ RAM

Mobile

Laptop

Server

Quick Summary

⚠️ PREVIEW RELEASE: This is a preview version of EuroVLM-9B.

Device Compatibility

Mobile

4-6GB RAM

Laptop

16GB RAM

Server

GPU

Minimum Recommended

9GB+ RAM

Code Examples

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Run the modelpythontransformers

from PIL import Image
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
    
model_id = "utter-project/EuroVLM-9B-Preview"
processor = LlavaNextProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(model_id)

# Load an image
image = Image.open("/path/to/image.jpg")
    
messages = [
    {
        "role": "system",
        "content": "You are EuroVLM --- a multimodal AI assistant specialized in European languages that provides safe, educational and helpful answers about images and text.",
    },
    {
        "role": "user", 
        "content": [
            {"type": "image"},
            {"type": "text", "text": "What do you see in this image? Please describe it in Portuguese."}
        ]
    },
]

prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(images=image, text=prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
print(processor.decode(outputs[0], skip_special_tokens=True))

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.