Proto-GheyaInnov

4
by
Finisha-LLM
Language Model
OTHER
New
4 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Code Examples

✨ exemple d'inférence ❄️texttransformers
import torch
import torch.nn as nn
import math
from transformers import (
    PretrainedConfig, 
    PreTrainedModel, 
    AutoTokenizer, 
    GenerationMixin
)
from transformers.modeling_outputs import CausalLMOutputWithPast

# --- ARCHITECTURE (Inchangée pour la compatibilité) ---
class GheyaInnovConfig(PretrainedConfig):
    model_type = "gheya_innov"
    def __init__(self, vocab_size=50257, hidden_size=512, num_hidden_layers=8, **kwargs):
        super().__init__(**kwargs)
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = 8
        self.intermediate_size = 2048

class GheyaSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.num_heads = config.num_attention_heads
        self.head_dim = config.hidden_size // config.num_attention_heads
        self.query = nn.Linear(config.hidden_size, config.hidden_size)
        self.key = nn.Linear(config.hidden_size, config.hidden_size)
        self.value = nn.Linear(config.hidden_size, config.hidden_size)
        self.out_proj = nn.Linear(config.hidden_size, config.hidden_size)

    def forward(self, x):
        batch, seq_len, _ = x.shape
        q = self.query(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        k = self.key(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        v = self.value(x).view(batch, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)
        attn_weights = torch.softmax(scores, dim=-1)
        context = torch.matmul(attn_weights, v).transpose(1, 2).contiguous()
        return self.out_proj(context.view(batch, seq_len, -1))

class GheyaLayer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.hidden_size)
        self.attn = GheyaSelfAttention(config)
        self.ln2 = nn.LayerNorm(config.hidden_size)
        self.mlp = nn.Sequential(
            nn.Linear(config.hidden_size, config.intermediate_size),
            nn.GELU(),
            nn.Linear(config.intermediate_size, config.hidden_size),
        )
    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.mlp(self.ln2(x))
        return x

class GheyaInnovModel(PreTrainedModel, GenerationMixin):
    config_class = GheyaInnovConfig
    def __init__(self, config):
        super().__init__(config)
        self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
        self.layers = nn.ModuleList([GheyaLayer(config) for _ in range(config.num_hidden_layers)])
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
        self.post_init()

    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        x = self.embeddings(input_ids)
        for layer in self.layers:
            x = layer(x)
        logits = self.lm_head(x)
        return CausalLMOutputWithPast(logits=logits)

    def prepare_inputs_for_generation(self, input_ids, **kwargs):
        return {"input_ids": input_ids}

# --- FONCTION D'INFÉRENCE AMÉLIORÉE ---

def generate_clean_text(repo_id, prompt):
    tokenizer = AutoTokenizer.from_pretrained(repo_id)
    model = GheyaInnovModel.from_pretrained(repo_id)
    model.eval()

    inputs = tokenizer(f"Question: {prompt} Réponse:", return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=60,
            do_sample=True,        # Active l'échantillonnage
            temperature=0.5,       # Plus bas = plus stable/moins créatif
            top_k=40,              # Limite aux 40 meilleurs mots (élimine le bruit)
            top_p=0.85,            # Nucleus sampling pour la cohérence
            repetition_penalty=1.5, # Empêche le modèle de bégayer
            no_repeat_ngram_size=3, # Interdit de répéter les mêmes groupes de 3 mots
            pad_token_id=tokenizer.eos_token_id
        )

    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.split("Réponse:")[-1].strip()

if __name__ == "__main__":
    REPO = "Finisha-LLM/Proto-GheyaInnov" # À mettre à jour
    print("Réponse :", generate_clean_text(REPO, "Qui es-tu (générer du texte avec le Proto-GheyaInnov) ?"))

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.