kimi-k2.5-tiny-random
19
1
—
by
yujiepan
Embedding Model
OTHER
2.5B params
New
19 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
6GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
3GB+ RAM
Code Examples
Example usage:pythontransformers
import base64
import requests
import torch
from transformers import AutoModel, AutoProcessor
model_id = "yujiepan/kimi-k2.5-tiny-random"
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"
image_base64 = base64.b64encode(requests.get(image_url).content).decode()
messages = [
{
'role': 'user',
'content': [
{'type': 'text', 'text': 'Describe this image in detail.'},
{
'type': 'image_url',
'image_url': f'data:image/png;base64,{image_base64}',
},
],
}
]
processor = AutoProcessor.from_pretrained(
model_id,
trust_remote_code=True,
)
model = AutoModel.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="cuda",
trust_remote_code=True,
)
inputs = processor(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
inputs.pop("token_type_ids", None)
generated_ids = model.generate(**inputs, max_new_tokens=16)
output_text = processor.decode(
generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
print(output_text)Printing the model:text
KimiK25ForConditionalGeneration(
(vision_tower): MoonViT3dPretrainedModel(
(patch_embed): MoonVision3dPatchEmbed(
(proj): Conv2d(3, 64, kernel_size=(14, 14), stride=(14, 14))
(pos_emb): Learnable2DInterpPosEmbDivided_fixed()
)
(encoder): MoonViT3dEncoder(
(rope_2d): Rope2DPosEmbRepeated(dim=32, max_height=512, max_width=512, theta_base=10000)
(blocks): ModuleList(
(0-1): 2 x MoonViTEncoderLayer(
(norm0): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(mlp): MLP2(
(fc0): Linear(in_features=64, out_features=128, bias=True)
(fc1): Linear(in_features=128, out_features=64, bias=True)
(activation): PytorchGELUTanh()
)
(wqkv): Linear(in_features=64, out_features=192, bias=True)
(wo): Linear(in_features=64, out_features=64, bias=True)
)
)
(final_layernorm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
)
)
(mm_projector): PatchMergerMLP(
(pre_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(proj): Sequential(
(0): Linear(in_features=256, out_features=256, bias=True)
(1): GELU(approximate='none')
(2): Linear(in_features=256, out_features=8, bias=True)
)
)
(language_model): DeepseekV3ForCausalLM(
(model): DeepseekV3Model(
(embed_tokens): Embedding(163840, 8, padding_idx=163839)
(layers): ModuleList(
(0): DeepseekV3DecoderLayer(
(self_attn): DeepseekV3Attention(
(q_a_proj): Linear(in_features=8, out_features=32, bias=False)
(q_a_layernorm): DeepseekV3RMSNorm()
(q_b_proj): Linear(in_features=32, out_features=256, bias=False)
(kv_a_proj_with_mqa): Linear(in_features=8, out_features=576, bias=False)
(kv_a_layernorm): DeepseekV3RMSNorm()
(kv_b_proj): Linear(in_features=384, out_features=128, bias=False)
(o_proj): Linear(in_features=64, out_features=8, bias=False)
(rotary_emb): DeepseekV3YarnRotaryEmbedding()
)
(mlp): DeepseekV3MLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): SiLU()
)
(input_layernorm): DeepseekV3RMSNorm()
(post_attention_layernorm): DeepseekV3RMSNorm()
)
(1): DeepseekV3DecoderLayer(
(self_attn): DeepseekV3Attention(
(q_a_proj): Linear(in_features=8, out_features=32, bias=False)
(q_a_layernorm): DeepseekV3RMSNorm()
(q_b_proj): Linear(in_features=32, out_features=256, bias=False)
(kv_a_proj_with_mqa): Linear(in_features=8, out_features=576, bias=False)
(kv_a_layernorm): DeepseekV3RMSNorm()
(kv_b_proj): Linear(in_features=384, out_features=128, bias=False)
(o_proj): Linear(in_features=64, out_features=8, bias=False)
(rotary_emb): DeepseekV3YarnRotaryEmbedding()
)
(mlp): DeepseekV3MoE(
(experts): ModuleList(
(0-31): 32 x DeepseekV3MLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): SiLU()
)
)
(gate): MoEGate()
(shared_experts): DeepseekV3MLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): SiLU()
)
)
(input_layernorm): DeepseekV3RMSNorm()
(post_attention_layernorm): DeepseekV3RMSNorm()
)
)
(norm): DeepseekV3RMSNorm()
)
(lm_head): Linear(in_features=8, out_features=163840, bias=False)
)
)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.