NVIDIA-Nemotron-Nano-12B-v2

Name: NVIDIA-Nemotron-Nano-12B-v2
Author: nvidia

17.9K

132

131K

Long context

12.0B

6 languages

—

nvidia

Language Model

OTHER

12B params

Fair

18K downloads

Community-tested

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

27GB+ RAM

Mobile

Laptop

Server

Quick Summary

--- license: other license_name: nvidia-open-model-license license_link: >- https://www.

Device Compatibility

Mobile

4-6GB RAM

Laptop

16GB RAM

Server

GPU

Minimum Recommended

12GB+ RAM

Code Examples

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

**Use it with vLLM**bashvllm

pip install -U "vllm>=0.10.1"

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

bashvllm

vllm serve nvidia/NVIDIA-Nemotron-Nano-12B-v2 \
    --trust-remote-code \
    --max-num-seqs 64 \
    --mamba_ssm_cache_dtype float32

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.