Nemotron-nano-9b-fp8
438
6
9.0B
1 language
—
by
weathermanj
Language Model
OTHER
9B params
New
438 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
21GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
9GB+ RAM
Code Examples
Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Usagepythonvllm
from vllm import LLM, SamplingParams
# Load the FP8 quantized model
model = LLM(
model="weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
dtype="auto" # Will auto-detect FP8 format
)
# Generate with streaming support
prompts = ["Explain the benefits of hybrid Mamba-Transformer architectures."]
sampling_params = SamplingParams(
temperature=0.7,
top_p=0.9,
max_tokens=256
)
outputs = model.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Alternative: Transformerspythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
"weathermanj/nemotron-nano-9b-fp8",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("weathermanj/nemotron-nano-9b-fp8")
# Generate text
inputs = tokenizer("How does FP8 quantization improve AI efficiency?", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Citationbibtex
@software{nemotron_fp8_quantized,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8: Efficient FP8 Quantization},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/nvidia-nemotron-nano-9b-v2-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Model Lineagetext
nvidia/NVIDIA-Nemotron-Nano-9B-v2 (Base Model)
↓ (FP8 Quantization by jwjohns)
weathermanj/nvidia-nemotron-nano-9b-v2-fp8 (This Model)Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Usage Trackingbibtex
@software{nvidia_nemotron_fp8,
title={NVIDIA-Nemotron-Nano-9B-v2-FP8},
author={jwjohns},
organization={Emendat.io},
year={2025},
url={https://huggingface.co/weathermanj/Nemotron-nano-9b-fp8},
note={FP8 quantized version of NVIDIA Nemotron-Nano-9B-v2},
baseModel={nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}
@article{nvidia2024nemotron,
title={Nemotron-4 Technical Report},
author={NVIDIA},
year={2024},
url={https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2}
}Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.