inference-optimization

53 models • 0 total models in database

Sort by:

Qwen3-Coder-Next.w4a16

license:apache-2.0

Qwen3-Coder-Next-NVFP4

license:apache-2.0

Qwen3-Coder-Next.w8a8

license:apache-2.0

Ministral-3-14B-Instruct-2512-FP8-dynamic

license:apache-2.0

Qwen3-Next-80B-A3B-Instruct-FP8-dynamic

license:apache-2.0

Qwen3-Next-80B-A3B-Instruct-FP8-block

license:apache-2.0

Qwen3-Next-80B-A3B-Thinking-FP8-dynamic

license:apache-2.0

Qwen3-Coder-Next-FP8-dynamic

license:apache-2.0

NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4

license:apache-2.0

Llama-3.1-8B-Instruct-QKV-Cache-FP8-Per-Head

Qwen3-4B-Thinking-2507.w4a16

license:apache-2.0

Qwen3-4B-Thinking-2507.w8a8

license:apache-2.0

Qwen3-Next-80B-A3B-Instruct-quantized.w4a16

license:apache-2.0

Qwen3-Next-80B-A3B-Instruct_mtp_speculator

license:apache-2.0

Qwen3-Next-80B-A3B-Thinking-FP8-block

license:apache-2.0

NVIDIA-Nemotron-3-Nano-30B-A3B-FP8-dynamic

license:apache-2.0

Qwen3-Next-80B-A3B-Instruct-NVFP4

license:apache-2.0

Qwen3-30B-A3B-Instruct-2507.w4a16

license:apache-2.0

sarvam-30b-NVFP4

GLM-4.6-quantized.w4a16

Ministral-3-14B-Instruct-2512.w4a16

Qwen3-4B-Instruct-2507.w8a8

license:apache-2.0

Qwen3-4B-Instruct-2507.w4a16

license:apache-2.0

Qwen3-Next-80B-A3B-Thinking-NVFP4

license:apache-2.0

Llama-3.1-8B-Instruct-FP8-dynamic-QKV-Cache-FP8-Per-Head

granite-4.0-h-tiny-quantized.w8a8

license:apache-2.0

MiniMax-M2.5-NVFP4

gpt-oss-120b-ckpt4-speculator.eagle3

Qwen3-30B-A3B-Thinking-2507.w4a16

license:apache-2.0

sarvam-105b-FP8-Dynamic

sarvam-30b-FP8-Dynamic

gpt-oss-120b-from-self-ckpt1-speculator.eagle3

gpt-oss-120b-from-self-ckpt5-speculator.eagle3

gpt-oss-120b-from-self-ckpt3-speculator.eagle3

gpt-oss-120b-from-self-ckpt2-speculator.eagle3

gpt-oss-120b-from-self-ckpt0-speculator.eagle3

gpt-oss-120b-from-self-ckpt4-speculator.eagle3

Qwen3-30B-A3B-Thinking-2507.w8a8

license:apache-2.0

Mistral-Small-4-119B-2603-BF16

license:apache-2.0

Mistral3_speculator_dummy

GLM-4.6-NVFP4

Qwen3-30B-A3B-Instruct-2507.w8a8

license:apache-2.0

Qwen3-30B-A3B-Instruct-2507-6bits

Ministral-3-14B-Instruct-2512-FP8

gpt-oss-120b-ckpt3-speculator.eagle3

GLM-4.6-quantized.w8a8

test_qwen3_next_mtp

test_tencentbac_fastmtp

Qwen3-Next-80B-A3B-Thinking-quantized.w4a16

license:apache-2.0

Qwen3-32B-Thinking-speculator.eagle3

license:apache-2.0

GLM-4.6-FP8-dynamic

Qwen3-30B-A3B_5.5_bits_mode_noise

gpt-oss-120b-from-qwen235b-then-self-ckpt4-speculator.eagle3