Qwen2.5-1.5B-THREADRIPPER-v0.1
2
1.5B
—
by
Xiaojian9992024
Language Model
OTHER
1.5B params
New
2 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
4GB+ RAM
Mobile
Laptop
Server
Quick Summary
This is a merge of pre-trained language models created using mergekit.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
2GB+ RAM
Code Examples
Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Configurationyaml
merge_method: della_linear
base_model: Qwen/Qwen2.5-1.5B-Instruct
dtype: bfloat16
parameters:
epsilon: 0.02
lambda: 1.65
normalize: true
adaptive_merge_parameters:
task_weights:
tinyArc: 2.2
tinyHellaswag: 2.15
tinyMMLU: 1.9
tinyTruthfulQA: 2.0
tinyTruthfulQA_mc1: 1.95
tinyWinogrande: 1.85
IFEval: 2.5 # Best stat, for instruction-following
BBH: 2.2
MATH: 2.3
GPQA: 2.0
MUSR: 2.0
MMLU-PRO: 2.15
smoothing_factor: 0.15
models:
- model: Qwen/Qwen2.5-Coder-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-Math-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: Qwen/Qwen2.5-1.5B-Instruct
parameters:
weight: 1
density: 1
- model: justinj92/Qwen2.5-1.5B-Thinking
parameters:
weight: 1
density: 1
- model: prithivMLmods/Bellatrix-Tiny-1.5B-R1
parameters:
weight: 1
density: 1Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.