EuroLLM-22B-Instruct-2512

6.8K
51
llama
by
utter-project
Language Model
OTHER
22B params
New
7K downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
50GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
21GB+ RAM

Code Examples

sequence_parallel_degree: 4yaml
auto_resume_from_checkpoints: true
use_tensorboard: true

base_model: utter-project/EuroLLM-22B-2512
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer

load_in_8bit: false
load_in_4bit: false
strict: false

dataset_processes: 64
datasets:
  - path: utter-project/EuroBlocks-SFT-2512
    type: chat_template
    split: train
    conversation: chatml
    field_messages: conversations
    message_field_role: role
    message_field_content: content
    roles_to_train: ["assistant"]
    train_on_eos: all


chat_template_jinja: "{% for message in messages %}{% if message['role'] == 'assistant' %}{% set role = 'assistant' %}{% else %}{% set role = message['role'] %}{% endif %}<|im_start|>{{ role }}\n{{ message['content'] | trim }}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant\n'}}{% endif %}"
 
output_dir: checkpoints
val_set_size: 0

sequence_len: 32768
sample_packing: true
pad_to_sequence_len: true

# sequence_parallel_degree: 4
# heads_k_stride: 1
# ring_attn_func:

plugins:
  - axolotl.integrations.liger.LigerPlugin
liger_rope: true
liger_rms_norm: true
liger_glu_activation: true
liger_layer_norm: true
liger_fused_linear_cross_entropy: true

# N_GPUS * GRAD_ACC_STEPS * MICRO_BATCH_SIZE * SEQ_LEN = tokens/step ->
# Assuming 32 gpus (32 * 2 * 2 * 32k = 4 096 000 tokens/step)
gradient_accumulation_steps: 2
micro_batch_size: 2

eval_batch_size: 1
num_epochs: 5
optimizer: adamw_torch
lr_scheduler: cosine
learning_rate: 1e-5

train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: false

gradient_checkpointing: true
logging_steps: 1
flash_attention: true
flash_attn_cross_entropy: false
flash_attn_rms_norm: false
flash_attn_fuse_qkv: false
flash_attn_fuse_mlp: false

warmup_steps: 125
eval_sample_packing: False
save_steps: 500
save_total_limit: 2
deepspeed: deepspeed_configs/zero3_bf16.json
weight_decay: 0.01

special_tokens:
  eos_token: "<|im_end|>"

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.