monarch-bert-base-mnli

97
license:apache-2.0
by
ykae
Embedding Model
OTHER
New
97 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Code Examples

torch.set_float32_matmul_precision('high')pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "ykae/monarch-bert-base-mnli"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(
    model_id, 
    trust_remote_code=True
).to(device)

# torch.set_float32_matmul_precision('high')
# model = torch.compile(model, mode="max-autotune")
model.eval()

print("šŸ“Š Loading MNLI Validation set...")
dataset = load_dataset("glue", "mnli", split="validation_matched")

def tokenize_fn(ex):
    return tokenizer(ex['premise'], ex['hypothesis'], 
                     padding="max_length", truncation=True, max_length=128)

tokenized_ds = dataset.map(tokenize_fn, batched=True)
tokenized_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
loader = DataLoader(tokenized_ds, batch_size=32)

correct = 0
total = 0

print(f"šŸš€ Starting evaluation on {len(tokenized_ds)} samples...")
with torch.no_grad():
    for batch in tqdm(loader):
        ids = batch['input_ids'].to(device)
        mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(ids, attention_mask=mask)
        preds = torch.argmax(outputs.logits, dim=1)
        
        correct += (preds == labels).sum().item()
        total += labels.size(0)

print(f"\nāœ… Evaluation Finished!")
print(f"šŸ“ˆ Accuracy: {100 * correct / total:.2f}%")
Citationbibtex
@misc{ykae-monarch-bert-mnli-2026,
  author = {Yusuf Kalyoncuoglu, YKAE-Vision},
  title = {Monarch-BERT-MNLI: Extreme Compression via Monarch FFNs},
  year = {2026},
  publisher = {Hugging Face},
  journal = {Hugging Face Model Hub},
  howpublished = {\url{https://huggingface.co/ykae/monarch-bert-base-mnli}}
}

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.