Nekoly
33
1
license:mit
by
Clemylia
Language Model
OTHER
New
33 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
Importations des librairies nécessaires pour le chargementtextpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
import os
import collections
import heapq
# Importations des librairies nécessaires pour le chargement
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file as load_safetensors_file
# --- A. AricateAttentionLayer (Inchangé) ---
class AricateAttentionLayer(nn.Module):
# ... (code inchangé) ...
"""Couche d'Attention Additive (Bahdanau)."""
def __init__(self, hidden_dim):
super(AricateAttentionLayer, self).__init__()
self.W = nn.Linear(hidden_dim, hidden_dim)
self.U = nn.Linear(hidden_dim, hidden_dim)
self.V = nn.Linear(hidden_dim, 1, bias=False)
def forward(self, rnn_outputs, last_hidden):
last_hidden_expanded = last_hidden.unsqueeze(1)
energy = torch.tanh(self.W(rnn_outputs) + self.U(last_hidden_expanded))
attention_weights_raw = self.V(energy).squeeze(2)
attention_weights = F.softmax(attention_weights_raw, dim=1)
context_vector = torch.sum(rnn_outputs * attention_weights.unsqueeze(2), dim=1)
return context_vector
# --- B. AricateModel (Inchangé) ---
class AricateModel(nn.Module):
# ... (code inchangé) ...
"""Architecture Aricate V4, adaptée pour le rechargement."""
def __init__(self, vocab_size: int, embedding_dim: int, hidden_dim: int, num_layers: int = 1, config: dict = None):
super(AricateModel, self).__init__()
if config is not None:
vocab_size = config.get("vocab_size", vocab_size)
embedding_dim = config.get("embedding_dim", embedding_dim)
hidden_dim = config.get("hidden_dim", hidden_dim)
num_layers = config.get("num_layers", num_layers)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.word_embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=0)
self.rnn = nn.GRU(input_size=embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
self.attention = AricateAttentionLayer(hidden_dim)
self.hidden_to_vocab = nn.Linear(hidden_dim * 2, vocab_size)
def forward(self, input_words):
embeds = self.word_embeddings(input_words)
rnn_out, hn = self.rnn(embeds)
last_hidden = hn[-1]
context_vector = self.attention(rnn_out, last_hidden)
combined_features = torch.cat((context_vector, last_hidden), dim=1)
logits = self.hidden_to_vocab(combined_features)
return logits
# --- C. WordTokenizer (Inchangé) ---
class WordTokenizer:
# ... (code inchangé) ...
"""Tokenizer Aricate adapté pour recharger à partir du vocabulaire publié."""
def __init__(self, word_to_id: dict):
self.word_to_id = word_to_id
self.id_to_word = {id: word for word, id in word_to_id.items()}
self.vocab_size = len(word_to_id)
self.special_tokens = {
'<pad>': word_to_id['<pad>'],
'<unk>': word_to_id['<unk>'],
'<eos>': word_to_id['<eos>'],
'<sep>': word_to_id['<sep>'],
}
def encode(self, text, add_eos=False):
words = text.lower().split()
if add_eos:
words.append('<eos>')
ids = [self.word_to_id.get(word, self.word_to_id['<unk>']) for word in words]
return ids
def decode(self, ids):
words = [self.id_to_word.get(id, '<unk>') for id in ids]
return " ".join(word for word in words if word not in ['<pad>', '<unk>', '<eos>', '<sep>'])
# --- D. Fonction de Génération (MODIFIÉE pour Top-K Sampling et Temperature) ---
def generate_sequence(model, tokenizer, question, max_length, max_len_input, temperature=1.0, top_k=None):
"""
Génère la réponse en utilisant Top-K Sampling et Temperature.
Args:
temperature (float): Ajuste la créativité (T > 1.0) ou la prudence (T < 1.0).
top_k (int/None): Limite le choix aux K mots les plus probables pour l'échantillonnage.
"""
model.eval()
sep_id = tokenizer.special_tokens['<sep>']
eos_id = tokenizer.special_tokens['<eos>']
question_ids = tokenizer.encode(question)
current_sequence = question_ids + [sep_id]
print(f"\n--- Q/A Génération (Sampling | T={temperature:.2f} | K={top_k if top_k else 'désactivé'}) ---")
print(f"Question: '{question}'")
with torch.no_grad():
for _ in range(max_length):
# Préparer l'entrée
input_ids_to_pad = current_sequence[-max_len_input:] if len(current_sequence) > max_len_input else current_sequence
padding_needed = max_len_input - len(input_ids_to_pad)
input_ids_padded = [tokenizer.special_tokens['<pad>']] * padding_needed + input_ids_to_pad
input_tensor = torch.tensor(input_ids_padded).unsqueeze(0)
# 1. Obtention des logits
logits = model(input_tensor).squeeze(0)
# 2. Application de la Temperature
if temperature != 1.0 and temperature > 0:
logits = logits / temperature
# 3. Application du Top-K
if top_k is not None:
# Filtrer les logits pour ne garder que le top_k
values, indices = torch.topk(logits, k=top_k)
# Créer un masque (tensor rempli de -inf)
mask = torch.ones_like(logits) * float('-inf')
# Mettre à jour le masque avec les valeurs filtrées
logits = torch.scatter(mask, dim=0, index=indices, src=values)
# 4. Convertir en probabilités et échantillonner
probabilities = F.softmax(logits, dim=-1)
# S'assurer que les probabilités somment à 1
if top_k is not None:
probabilities = probabilities.div(probabilities.sum())
predicted_id = torch.multinomial(probabilities, num_samples=1).item()
# 5. Mettre à jour la séquence
current_sequence.append(predicted_id)
if predicted_id == eos_id:
break
# 6. Décodage
try:
sep_index = current_sequence.index(sep_id)
response_ids = [id for id in current_sequence[sep_index+1:] if id != eos_id]
except ValueError:
response_ids = current_sequence
final_response = tokenizer.decode(response_ids)
# Dans le sampling, on n'a pas de score de log-probabilité unique comme dans Beam Search.
print(f"Réponse générée: '{final_response}'")
print("-" * 40)
return final_response
# --- E. Fonction de Chargement du Modèle Lam-2 (Inchangée) ---
def load_lam2_model(repo_id: str):
# ... (code inchangé) ...
"""
Télécharge et charge le modèle Lam-2 et son tokenizer depuis Hugging Face.
"""
print(f"--- Chargement de Lam-2 depuis {repo_id} ---")
# 1. Télécharger le tokenizer
tokenizer_path = hf_hub_download(repo_id=repo_id, filename="aricate_tokenizer.txt")
with open(tokenizer_path, 'r', encoding='utf-8') as f:
word_to_id = json.load(f)
tokenizer = WordTokenizer(word_to_id)
print(f"Tokenizer chargé. Taille du vocabulaire: {tokenizer.vocab_size}")
# 2. Télécharger la configuration
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
with open(config_path, 'r') as f:
model_config = json.load(f)
print("Configuration du modèle chargée.")
# 3. Initialiser le modèle
model = AricateModel(
vocab_size=model_config['vocab_size'],
embedding_dim=model_config['embedding_dim'],
hidden_dim=model_config['hidden_dim'],
config=model_config
)
# 4. Télécharger et charger les poids Safetensors
weights_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
state_dict = load_safetensors_file(weights_path)
model.load_state_dict(state_dict)
print("Poids du modèle Safetensors chargés avec succès.")
MAX_LEN_INPUT_DEFAULT = 30
print("-" * 40)
return model, tokenizer, MAX_LEN_INPUT_DEFAULT
# --- F. Bloc principal d'exécution (MISE À JOUR) ---
if __name__ == '__main__':
LAM2_REPO_ID = "Clemylia/Nekoly"
MAX_GENERATION_LENGTH = 15
# 🚨 NOUVEAUX PARAMÈTRES POUR LE TEST 🚨
TEST_TEMPERATURE = 0.9 # > 1.0 pour plus de créativité/aléatoire
TEST_TOP_K = 10 # Limite le choix aux 10 mots les plus probables
test_questions = [
"Quelia eta la architectia di tu, Nekoly ?",
"Coma apalla la capitalia di la Francellia ?",
"Combiania di journia avst la annia lunallia ?",
"Quelia estimenta creaea la pluviay ?",
"Riel avst ecrita *La Odyssellia* ?",
]
try:
# 1. Chargement du modèle
lam2_model, lam2_tokenizer, max_len_input = load_lam2_model(LAM2_REPO_ID)
print(f"\n>>> TEST D'INFÉRENCE LAM-2 EN MODE CRÉATIF (T={TEST_TEMPERATURE}, K={TEST_TOP_K}) <<<")
# 2. Infèrence (Appel à la nouvelle fonction)
for question in test_questions:
generate_sequence( # Remplacement de generate_sequence_beam
model=lam2_model,
tokenizer=lam2_tokenizer,
question=question,
max_length=MAX_GENERATION_LENGTH,
max_len_input=max_len_input,
temperature=TEST_TEMPERATURE,
top_k=TEST_TOP_K
)
except Exception as e:
print(f"\n❌ Une erreur est survenue lors du chargement ou de l'inférence.")
print(f"Détail de l'erreur: {e}")
print("Vérifiez l'installation des dépendances et le REPO_ID.")Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.