MS3-test-Merge-1
3
—
by
Nohobby
Language Model
OTHER
24B params
New
3 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
54GB+ RAM
Mobile
Laptop
Server
Quick Summary
I haven't tried the untuned MS3 before messing around with the merge.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
23GB+ RAM
Code Examples
Merge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseMerge Detailsyaml
models:
- model: unsloth/Mistral-Small-24B-Base-2501
- model: unsloth/Mistral-Small-24B-Instruct-2501+ToastyPigeon/new-ms-rp-test-ws
parameters:
select_topk:
- value: [0.05, 0.03, 0.02, 0.02, 0.01]
- model: unsloth/Mistral-Small-24B-Instruct-2501+estrogen/MS2501-24b-Ink-ep2-adpt
parameters:
select_topk: 0.1
- model: trashpanda-org/MS-24B-Instruct-Mullein-v0
parameters:
select_topk: 0.4
base_model: unsloth/Mistral-Small-24B-Base-2501
merge_method: sce
parameters:
int8_mask: true
rescale: true
normalize: true
dtype: bfloat16
tokenizer_source: baseStep2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Step2yaml
dtype: bfloat16
tokenizer_source: base
merge_method: della_linear
parameters:
density: 0.55
base_model: Step1
models:
- model: unsloth/Mistral-Small-24B-Instruct-2501
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
- filter: up_proj
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
- value: 0
- model: Step1
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
- filter: up_proj
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
- value: 1Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.