EuroLLM-22B-Instruct-2512
6.8K
51
llama
by
utter-project
Language Model
OTHER
22B params
New
7K downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
50GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
21GB+ RAM
Code Examples
sequence_parallel_degree: 4yaml
auto_resume_from_checkpoints: true
use_tensorboard: true
base_model: utter-project/EuroLLM-22B-2512
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: false
strict: false
dataset_processes: 64
datasets:
- path: utter-project/EuroBlocks-SFT-2512
type: chat_template
split: train
conversation: chatml
field_messages: conversations
message_field_role: role
message_field_content: content
roles_to_train: ["assistant"]
train_on_eos: all
chat_template_jinja: "{% for message in messages %}{% if message['role'] == 'assistant' %}{% set role = 'assistant' %}{% else %}{% set role = message['role'] %}{% endif %}<|im_start|>{{ role }}\n{{ message['content'] | trim }}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant\n'}}{% endif %}"
output_dir: checkpoints
val_set_size: 0
sequence_len: 32768
sample_packing: true
pad_to_sequence_len: true
# sequence_parallel_degree: 4
# heads_k_stride: 1
# ring_attn_func:
plugins:
- axolotl.integrations.liger.LigerPlugin
liger_rope: true
liger_rms_norm: true
liger_glu_activation: true
liger_layer_norm: true
liger_fused_linear_cross_entropy: true
# N_GPUS * GRAD_ACC_STEPS * MICRO_BATCH_SIZE * SEQ_LEN = tokens/step ->
# Assuming 32 gpus (32 * 2 * 2 * 32k = 4 096 000 tokens/step)
gradient_accumulation_steps: 2
micro_batch_size: 2
eval_batch_size: 1
num_epochs: 5
optimizer: adamw_torch
lr_scheduler: cosine
learning_rate: 1e-5
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: false
gradient_checkpointing: true
logging_steps: 1
flash_attention: true
flash_attn_cross_entropy: false
flash_attn_rms_norm: false
flash_attn_fuse_qkv: false
flash_attn_fuse_mlp: false
warmup_steps: 125
eval_sample_packing: False
save_steps: 500
save_total_limit: 2
deepspeed: deepspeed_configs/zero3_bf16.json
weight_decay: 0.01
special_tokens:
eos_token: "<|im_end|>"Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.