inference-optimization

53 models • 0 total models in database
Sort by:

Qwen3-Coder-Next.w4a16

license:apache-2.0
1,543
0

Qwen3-Coder-Next-NVFP4

license:apache-2.0
154
0

Qwen3-Coder-Next.w8a8

license:apache-2.0
144
0

Ministral-3-14B-Instruct-2512-FP8-dynamic

NaNK
license:apache-2.0
121
0

Qwen3-Next-80B-A3B-Instruct-FP8-dynamic

NaNK
license:apache-2.0
80
0

Qwen3-Next-80B-A3B-Instruct-FP8-block

NaNK
license:apache-2.0
76
0

Qwen3-Next-80B-A3B-Thinking-FP8-dynamic

NaNK
license:apache-2.0
72
0

Qwen3-Coder-Next-FP8-dynamic

license:apache-2.0
61
0

NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4

NaNK
license:apache-2.0
55
0

Llama-3.1-8B-Instruct-QKV-Cache-FP8-Per-Head

NaNK
llama
52
0

Qwen3-4B-Thinking-2507.w4a16

NaNK
license:apache-2.0
51
0

Qwen3-4B-Thinking-2507.w8a8

NaNK
license:apache-2.0
48
0

Qwen3-Next-80B-A3B-Instruct-quantized.w4a16

NaNK
license:apache-2.0
46
0

Qwen3-Next-80B-A3B-Instruct_mtp_speculator

NaNK
license:apache-2.0
40
0

Qwen3-Next-80B-A3B-Thinking-FP8-block

NaNK
license:apache-2.0
40
0

NVIDIA-Nemotron-3-Nano-30B-A3B-FP8-dynamic

NaNK
license:apache-2.0
35
0

Qwen3-Next-80B-A3B-Instruct-NVFP4

NaNK
license:apache-2.0
31
0

Qwen3-30B-A3B-Instruct-2507.w4a16

NaNK
license:apache-2.0
31
0

sarvam-30b-NVFP4

NaNK
26
1

GLM-4.6-quantized.w4a16

NaNK
26
0

Ministral-3-14B-Instruct-2512.w4a16

NaNK
25
0

Qwen3-4B-Instruct-2507.w8a8

NaNK
license:apache-2.0
22
0

Qwen3-4B-Instruct-2507.w4a16

NaNK
license:apache-2.0
22
0

Qwen3-Next-80B-A3B-Thinking-NVFP4

NaNK
license:apache-2.0
22
0

Llama-3.1-8B-Instruct-FP8-dynamic-QKV-Cache-FP8-Per-Head

NaNK
llama
22
0

granite-4.0-h-tiny-quantized.w8a8

license:apache-2.0
19
0

MiniMax-M2.5-NVFP4

15
0

gpt-oss-120b-ckpt4-speculator.eagle3

NaNK
15
0

Qwen3-30B-A3B-Thinking-2507.w4a16

NaNK
license:apache-2.0
14
0

sarvam-105b-FP8-Dynamic

NaNK
14
0

sarvam-30b-FP8-Dynamic

NaNK
13
1

gpt-oss-120b-from-self-ckpt1-speculator.eagle3

NaNK
12
0

gpt-oss-120b-from-self-ckpt5-speculator.eagle3

NaNK
12
0

gpt-oss-120b-from-self-ckpt3-speculator.eagle3

NaNK
11
0

gpt-oss-120b-from-self-ckpt2-speculator.eagle3

NaNK
11
0

gpt-oss-120b-from-self-ckpt0-speculator.eagle3

NaNK
11
0

gpt-oss-120b-from-self-ckpt4-speculator.eagle3

NaNK
11
0

Qwen3-30B-A3B-Thinking-2507.w8a8

NaNK
license:apache-2.0
11
0

Mistral-Small-4-119B-2603-BF16

NaNK
license:apache-2.0
11
0

Mistral3_speculator_dummy

11
0

GLM-4.6-NVFP4

NaNK
9
0

Qwen3-30B-A3B-Instruct-2507.w8a8

NaNK
license:apache-2.0
8
0

Qwen3-30B-A3B-Instruct-2507-6bits

NaNK
8
0

Ministral-3-14B-Instruct-2512-FP8

NaNK
8
0

gpt-oss-120b-ckpt3-speculator.eagle3

NaNK
7
0

GLM-4.6-quantized.w8a8

NaNK
7
0

test_qwen3_next_mtp

6
0

test_tencentbac_fastmtp

5
0

Qwen3-Next-80B-A3B-Thinking-quantized.w4a16

NaNK
license:apache-2.0
5
0

Qwen3-32B-Thinking-speculator.eagle3

NaNK
license:apache-2.0
4
0

GLM-4.6-FP8-dynamic

NaNK
1
0

Qwen3-30B-A3B_5.5_bits_mode_noise

NaNK
0
1

gpt-oss-120b-from-qwen235b-then-self-ckpt4-speculator.eagle3

NaNK
0
1