nemocurator-fineweb-nemotron-4-edu-classifier

286

11

—

by

nvidia

Other

OTHER

New

286 downloads

Early-stage

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

Unknown

Mobile

Laptop

Server

Quick Summary

Model Overview This is a text classification model designed to determine the educational value of a piece of text (score 0-5 from low to high).

Code Examples

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

How To Use in Transformerspythontransformers

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


texts = ["To make lemonade, you will need lemon juice, water, and sugar."]

model = AutoModelForSequenceClassification.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier",
    torch_dtype=torch.bfloat16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(
    "nvidia/nemocurator-fineweb-nemotron-4-edu-classifier"
)

inputs = tokenizer(
    texts,
    return_tensors="pt",
    padding="longest",
    truncation=True,
    max_length=512,
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits.squeeze(-1).float().cpu().numpy()

float_score = logits.tolist()
int_score = [int(round(max(0, min(score, 5)))) for score in logits]
pred_labels = ["high_quality" if score >= 2.5 else "low_quality" for score in logits]

print("Score:", float_score)
print("Rounded score:", int_score)
print("Predicted label:", pred_labels)
# Score: [1.0859375]
# Rounded score: [1]
# Predicted label: ['low_quality']

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.