Proto-GheyaInnov
4
—
by
Finisha-LLM
Language Model
OTHER
New
4 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
✨ exemple d'inférence ❄️texttransformers
import torch
import torch.nn as nn
import math
from transformers import (
PretrainedConfig,
PreTrainedModel,
AutoTokenizer,
GenerationMixin
)
from transformers.modeling_outputs import CausalLMOutputWithPast
# --- ARCHITECTURE (Inchangée pour la compatibilité) ---
class GheyaInnovConfig(PretrainedConfig):
model_type = "gheya_innov"
def __init__(self, vocab_size=50257, hidden_size=512, num_hidden_layers=8, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 8
self.intermediate_size = 2048
class GheyaSelfAttention(nn.Module):
def __init__(self, config):
super().__init__()
self.num_heads = config.num_attention_heads
self.head_dim = config.hidden_size // config.num_attention_heads
self.query = nn.Linear(config.hidden_size, config.hidden_size)
self.key = nn.Linear(config.hidden_size, config.hidden_size)
self.value = nn.Linear(config.hidden_size, config.hidden_size)
self.out_proj = nn.Linear(config.hidden_size, config.hidden_size)
def forward(self, x):
batch, seq_len, _ = x.shape
q = self.query(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
k = self.key(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
v = self.value(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)
attn_weights = torch.softmax(scores, dim=-1)
context = torch.matmul(attn_weights, v).transpose(1, 2).contiguous()
return self.out_proj(context.view(batch, seq_len, -1))
class GheyaLayer(nn.Module):
def __init__(self, config):
super().__init__()
self.ln1 = nn.LayerNorm(config.hidden_size)
self.attn = GheyaSelfAttention(config)
self.ln2 = nn.LayerNorm(config.hidden_size)
self.mlp = nn.Sequential(
nn.Linear(config.hidden_size, config.intermediate_size),
nn.GELU(),
nn.Linear(config.intermediate_size, config.hidden_size),
)
def forward(self, x):
x = x + self.attn(self.ln1(x))
x = x + self.mlp(self.ln2(x))
return x
class GheyaInnovModel(PreTrainedModel, GenerationMixin):
config_class = GheyaInnovConfig
def __init__(self, config):
super().__init__(config)
self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
self.layers = nn.ModuleList([GheyaLayer(config) for _ in range(config.num_hidden_layers)])
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
self.post_init()
def forward(self, input_ids=None, attention_mask=None, **kwargs):
x = self.embeddings(input_ids)
for layer in self.layers:
x = layer(x)
logits = self.lm_head(x)
return CausalLMOutputWithPast(logits=logits)
def prepare_inputs_for_generation(self, input_ids, **kwargs):
return {"input_ids": input_ids}
# --- FONCTION D'INFÉRENCE AMÉLIORÉE ---
def generate_clean_text(repo_id, prompt):
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model = GheyaInnovModel.from_pretrained(repo_id)
model.eval()
inputs = tokenizer(f"Question: {prompt} Réponse:", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=60,
do_sample=True, # Active l'échantillonnage
temperature=0.5, # Plus bas = plus stable/moins créatif
top_k=40, # Limite aux 40 meilleurs mots (élimine le bruit)
top_p=0.85, # Nucleus sampling pour la cohérence
repetition_penalty=1.5, # Empêche le modèle de bégayer
no_repeat_ngram_size=3, # Interdit de répéter les mêmes groupes de 3 mots
pad_token_id=tokenizer.eos_token_id
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text.split("Réponse:")[-1].strip()
if __name__ == "__main__":
REPO = "Finisha-LLM/Proto-GheyaInnov" # À mettre à jour
print("Réponse :", generate_clean_text(REPO, "Qui es-tu (générer du texte avec le Proto-GheyaInnov) ?"))Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.