inference-optimization
53 models • 0 total models in database
Sort by:
Qwen3-Coder-Next.w4a16
license:apache-2.0
1,543
0
Qwen3-Coder-Next-NVFP4
license:apache-2.0
154
0
Qwen3-Coder-Next.w8a8
license:apache-2.0
144
0
Ministral-3-14B-Instruct-2512-FP8-dynamic
NaNK
license:apache-2.0
121
0
Qwen3-Next-80B-A3B-Instruct-FP8-dynamic
NaNK
license:apache-2.0
80
0
Qwen3-Next-80B-A3B-Instruct-FP8-block
NaNK
license:apache-2.0
76
0
Qwen3-Next-80B-A3B-Thinking-FP8-dynamic
NaNK
license:apache-2.0
72
0
Qwen3-Coder-Next-FP8-dynamic
license:apache-2.0
61
0
NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4
NaNK
license:apache-2.0
55
0
Llama-3.1-8B-Instruct-QKV-Cache-FP8-Per-Head
NaNK
llama
52
0
Qwen3-4B-Thinking-2507.w4a16
NaNK
license:apache-2.0
51
0
Qwen3-4B-Thinking-2507.w8a8
NaNK
license:apache-2.0
48
0
Qwen3-Next-80B-A3B-Instruct-quantized.w4a16
NaNK
license:apache-2.0
46
0
Qwen3-Next-80B-A3B-Instruct_mtp_speculator
NaNK
license:apache-2.0
40
0
Qwen3-Next-80B-A3B-Thinking-FP8-block
NaNK
license:apache-2.0
40
0
NVIDIA-Nemotron-3-Nano-30B-A3B-FP8-dynamic
NaNK
license:apache-2.0
35
0
Qwen3-Next-80B-A3B-Instruct-NVFP4
NaNK
license:apache-2.0
31
0
Qwen3-30B-A3B-Instruct-2507.w4a16
NaNK
license:apache-2.0
31
0
sarvam-30b-NVFP4
NaNK
—
26
1
GLM-4.6-quantized.w4a16
NaNK
—
26
0
Ministral-3-14B-Instruct-2512.w4a16
NaNK
—
25
0
Qwen3-4B-Instruct-2507.w8a8
NaNK
license:apache-2.0
22
0
Qwen3-4B-Instruct-2507.w4a16
NaNK
license:apache-2.0
22
0
Qwen3-Next-80B-A3B-Thinking-NVFP4
NaNK
license:apache-2.0
22
0
Llama-3.1-8B-Instruct-FP8-dynamic-QKV-Cache-FP8-Per-Head
NaNK
llama
22
0
granite-4.0-h-tiny-quantized.w8a8
license:apache-2.0
19
0
MiniMax-M2.5-NVFP4
—
15
0
gpt-oss-120b-ckpt4-speculator.eagle3
NaNK
—
15
0
Qwen3-30B-A3B-Thinking-2507.w4a16
NaNK
license:apache-2.0
14
0
sarvam-105b-FP8-Dynamic
NaNK
—
14
0
sarvam-30b-FP8-Dynamic
NaNK
—
13
1
gpt-oss-120b-from-self-ckpt1-speculator.eagle3
NaNK
—
12
0
gpt-oss-120b-from-self-ckpt5-speculator.eagle3
NaNK
—
12
0
gpt-oss-120b-from-self-ckpt3-speculator.eagle3
NaNK
—
11
0
gpt-oss-120b-from-self-ckpt2-speculator.eagle3
NaNK
—
11
0
gpt-oss-120b-from-self-ckpt0-speculator.eagle3
NaNK
—
11
0
gpt-oss-120b-from-self-ckpt4-speculator.eagle3
NaNK
—
11
0
Qwen3-30B-A3B-Thinking-2507.w8a8
NaNK
license:apache-2.0
11
0
Mistral-Small-4-119B-2603-BF16
NaNK
license:apache-2.0
11
0
Mistral3_speculator_dummy
—
11
0
GLM-4.6-NVFP4
NaNK
—
9
0
Qwen3-30B-A3B-Instruct-2507.w8a8
NaNK
license:apache-2.0
8
0
Qwen3-30B-A3B-Instruct-2507-6bits
NaNK
—
8
0
Ministral-3-14B-Instruct-2512-FP8
NaNK
—
8
0
gpt-oss-120b-ckpt3-speculator.eagle3
NaNK
—
7
0
GLM-4.6-quantized.w8a8
NaNK
—
7
0
test_qwen3_next_mtp
—
6
0
test_tencentbac_fastmtp
—
5
0
Qwen3-Next-80B-A3B-Thinking-quantized.w4a16
NaNK
license:apache-2.0
5
0
Qwen3-32B-Thinking-speculator.eagle3
NaNK
license:apache-2.0
4
0
GLM-4.6-FP8-dynamic
NaNK
—
1
0
Qwen3-30B-A3B_5.5_bits_mode_noise
NaNK
—
0
1
gpt-oss-120b-from-qwen235b-then-self-ckpt4-speculator.eagle3
NaNK
—
0
1