F2LLM-v2-160M
667
3
license:apache-2.0
by
codefuse-ai
Embedding Model
OTHER
New
667 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
Usagepython
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("codefuse-ai/F2LLM-v2-160M", device="cuda:0", model_kwargs={"torch_dtype": "bfloat16"})
# Some sample query and documents
query = "What is F2LLM used for?"
documents = [
'We present F2LLM, a family of fully open embedding LLMs that achieve a strong balance between model size, training data, and embedding performance.',
'F2LLM is a model for computing text embeddings that can be used for various NLP tasks such as information retrieval, semantic search, and text classification.',
'F2LLM 是 CodeFuse 开源的系列嵌入模型。',
'F2LLM — это модель вычисления встраивания текста, которую можно использовать для различных задач НЛП, таких как поиск информации, семантический поиск и классификация текста.'
]
# Encode the query and documents separately. The encode_query method uses the query prompt
query_embedding = model.encode_query(query)
document_embeddings = model.encode_document(documents)
print(query_embedding.shape, document_embeddings.shape)
# (640,) (4, 640)
# Compute cosine similarity between the query and documents
similarity = model.similarity(query_embedding, document_embeddings)
print(similarity)
# tensor([[0.6373, 0.7239, 0.6302, 0.7509]])tensor([[0.6373, 0.7239, 0.6302, 0.7509]])pythontransformers
from transformers import AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F
model_path = "codefuse-ai/F2LLM-v2-160M"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map={'': 0})
query = "What is F2LLM used for?"
query_prompt = "Instruct: Given a question, retrieve passages that can help answer the question.\nQuery: "
documents = [
'We present F2LLM, a family of fully open embedding LLMs that achieve a strong balance between model size, training data, and embedding performance.',
'F2LLM is a model for computing text embeddings that can be used for various NLP tasks such as information retrieval, semantic search, and text classification.',
'F2LLM 是 CodeFuse 开源的系列嵌入模型。',
'F2LLM — это модель вычисления встраивания текста, которую можно использовать для различных задач НЛП, таких как поиск информации, семантический поиск и классификация текста.'
]
def encode(sentences):
batch_size = len(sentences)
# the tokenizer will automatically add eos token
tokenized_inputs = tokenizer(sentences, padding=True, return_tensors='pt').to(model.device)
last_hidden_state = model(**tokenized_inputs).last_hidden_state
eos_positions = tokenized_inputs.attention_mask.sum(dim=1) - 1
embeddings = last_hidden_state[torch.arange(batch_size, device=model.device), eos_positions]
embeddings = F.normalize(embeddings, p=2, dim=1)
return embeddings
# Encode the query and documents
query_embedding = encode([query_prompt + query])
document_embeddings = encode(documents)
print(query_embedding.shape, document_embeddings.shape)
# torch.Size([1, 640]) torch.Size([4, 640])
# Compute cosine similarity between the query and documents
similarity = query_embedding @ document_embeddings.T
print(similarity)
# tensor([[0.6367, 0.7227, 0.6328, 0.7500]], device='cuda:0',
# dtype=torch.bfloat16, grad_fn=<MmBackward0>)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.