GigaChat3.1-10B-A1.8B-bf16

539
9
license:mit
by
ai-sage
Language Model
OTHER
10B params
New
539 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
23GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
10GB+ RAM

Code Examples

Usage Examplepythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

model_name = "ai-sage/GigaChat3.1-10B-A1.8B-bf16"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
model.generation_config = GenerationConfig.from_pretrained(model_name)
messages = [
    {"role": "user", "content": "Докажи теорему о неподвижной точке"}
]
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
outputs = model.generate(
    **inputs,
    max_new_tokens=1000,
)
prompt_len = inputs["input_ids"].shape[1]
result = tokenizer.decode(
    outputs[0][prompt_len:],
    skip_special_tokens=True,
)
print(result)
bash
curl http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "ai-sage/GigaChat3.1-10B-A1.8B-bf16",
    "messages": [
      {
        "role": "user",
        "content": "Докажи теорему о неподвижной точке"
      }
    ],
    "max_tokens": 400,
    "temperature": 0
  }'
bash
curl http://localhost:30000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "ai-sage/GigaChat3.1-10B-A1.8B-bf16",
    "messages": [
      {
        "role": "user",
        "content": "Докажи теорему о неподвижной точке"
      }
    ],
    "max_tokens": 1000,
    "temperature": 0
  }'
Function callingpythontransformers
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

FUNCTION_CALL_TOKEN = "<|function_call|>"

def parse_function_and_content(completion_str: str):
    completion_str = completion_str.strip()

    if FUNCTION_CALL_TOKEN not in completion_str:
        return None, completion_str or None

    content_part, function_part = completion_str.split(FUNCTION_CALL_TOKEN, 1)

    content = content_part.strip() or None
    function_part = function_part.strip()

    for suffix in ("</s>", "<s>"):
        if function_part.endswith(suffix):
            function_part = function_part[: -len(suffix)].strip()

    try:
        function_call = json.loads(function_part)
    except json.JSONDecodeError:
        return None, content if content is not None else completion_str

    if not (
        isinstance(function_call, dict)
        and "name" in function_call
        and "arguments" in function_call
        and isinstance(function_call["arguments"], dict)
    ):
        return None, content

    return function_call, content


model_name = "ai-sage/GigaChat3.1-10B-A1.8B-bf16"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
model.generation_config = GenerationConfig.from_pretrained(model_name)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Получить информацию о текущей погоде в указанном городе.",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "Название города (например, Москва, Казань)."
                    }
                },
                "required": ["city"]
            }
        }
    }
]

messages = [
    {"role": "user", "content": "Какая сейчас погода в Москве?"}
]

prompt = tokenizer.apply_chat_template(
    messages,
    tools=tools,
    tokenize=False,
    add_generation_prompt=True,
)

inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=1000,
    )

prompt_len = inputs["input_ids"].shape[1]
completion = tokenizer.decode(
    outputs[0][prompt_len:],
    skip_special_tokens=False,
)

function_call, content = parse_function_and_content(completion)
print(function_call, content)
bash
curl http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
  "model": "ai-sage/GigaChat3.1-10B-A1.8B-bf16",
  "temperature": 0,
  "messages": [
    {
      "role": "user",
      "content": "Какая сейчас погода в Москве?"
    }
  ],
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_weather",
        "description": "Получить информацию о текущей погоде в указанном городе.",
        "parameters": {
          "type": "object",
          "properties": {
            "city": {
              "type": "string",
              "description": "Название города (например, Москва, Казань)."
            }
          },
          "required": ["city"]
        }
      }
    }
  ]
}'

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.