MERaLiON-SER-v1
814
2
7 languages
—
by
MERaLiON
Embedding Model
OTHER
New
814 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
⚙️ Usage Examplespythontransformers
from transformers import AutoProcessor, AutoModelForAudioClassification
import torch, torchaudio
repo = "MERaLiON/MERaLiON-SER-v1"
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained(repo)
model = AutoModelForAudioClassification.from_pretrained(repo, trust_remote_code=True).to(device)
model.eval()
# ------- Single wav exmple --------------
wav, sr = torchaudio.load("sample.wav")
if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True)
wav = torchaudio.transforms.Resample(sr, 16000)(wav)
inputs = processor(wav.squeeze().numpy(), sampling_rate=16000, return_tensors="pt", return_attention_mask=True)
with torch.inference_mode():
out = model(**{k:v.to(device) for k,v in inputs.items() if k in ("input_features","attention_mask")})
logits, dims = out["logits"], out["dims"]
emo_idx = torch.argmax(logits, dim=1).item()
emo_map = ["Neutral","Happy","Sad","Angry","Fearful","Disgusted","Surprised"]
print("Predicted Emotion:", emo_map[emo_idx])
print("Valance/Arousal/Dominance:", dims.squeeze().tolist())
# -------- Batch inference example using above loaded wav file--------
wav = wav.squeeze().numpy() # tensor of size (41642,) --> (samples,)
wavs = [wav,wav,wav] # example list of wav (tensor) for batch, here batch size =3
batch_inputs = processor(
wavs, # list of 1D numpy arrays
sampling_rate=16000,
return_tensors="pt",
padding="max_length",
return_attention_mask=True,
)
with torch.inference_mode():
out = model(**{k: v.to(device) for k, v in batch_inputs.items() if k in ("input_features","attention_mask")})
logits, dims = out["logits"], out["dims"] # logits: (B, 7), dims: (B, 3) where B is batch size
emo_ids = torch.argmax(logits, dim=1).tolist()
for i in range(len(wavs)):
eid = emo_ids[i]
vad = dims.tolist()[i]
print(f"Batch Index {i} -> {emo_map[eid]} | VAD={vad}")🔹 CPU Inferencepythontransformers
from transformers import AutoProcessor, AutoModelForAudioClassification
import torch, soundfile as sf, torchaudio
repo = "MERaLiON/MERaLiON-SER-v1"
processor = AutoProcessor.from_pretrained(repo)
model = AutoModelForAudioClassification.from_pretrained(repo, trust_remote_code=True).cpu().eval()
wav, sr = sf.read("sample.wav")
if wav.ndim > 1: wav = wav.mean(axis=1)
if sr != 16000:
wav = torchaudio.functional.resample(torch.tensor(wav).unsqueeze(0), sr, 16000).squeeze(0).numpy()
inputs = processor(wav, sampling_rate=16000, return_tensors="pt")
with torch.inference_mode():
out = model(**inputs)
logits, dims = out["logits"], out["dims"]
emo_idx = torch.argmax(logits, dim=1).item()
emo_map = ["Neutral","Happy","Sad","Angry","Fearful","Disgusted","Surprised"]
print("Predicted Emotion:", emo_map[emo_idx])
print("Valance/Arousal/Dominance:", dims.squeeze().tolist())Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.