gte-multilingual-base-Ko-embedding
5
license:apache-2.0
by
jaeyong2
Embedding Model
OTHER
New
5 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
- H/W : colab A100 40GB - Data : jaeyong2/Ko-emb-PreView Accuracy - Alibaba-NLP/gte-multilingual-base : 0.
Code Examples
Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Evaluationtextpytorch
import torch
import numpy as np
from sklearn.metrics import pairwise_distances
from tqdm import tqdm
dataset = datasets.load_dataset("jaeyong2/Ko-emb-PreView")
validation_dataset = dataset["test"].select(range((1000)))
model.eval()
def evaluate(validation_dataset):
correct_count = 0
for item in tqdm(validation_dataset):
query_embedding = get_embedding(item["context"], model, tokenizer)
document_embedding = get_embedding(item["Title"], model, tokenizer)
negative_embedding = get_embedding(item["Fake Title"], model, tokenizer)
# 쿼리와 모든 문서 간의 유사도 계산 (코사인 거리 사용)
positive_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), document_embedding.detach().cpu().float().numpy(), metric="cosine")
negative_distances = pairwise_distances(query_embedding.detach().cpu().float().numpy(), negative_embedding.detach().cpu().float().numpy(), metric="cosine")
if positive_distances < negative_distances:
correct_count += 1
accuracy = correct_count / len(validation_dataset)
return accuracy
results = evaluate(validation_dataset)
print(f"Validation Results: {results}")Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.