SweRankEmbed-Large
12
1
license:cc-by-nc-4.0
by
Salesforce
Code Model
OTHER
2505.07849B params
New
12 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
5600GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
2334GB+ RAM
Code Examples
Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Requirementspython
from from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Salesforce/SweRankEmbed-Large", trust_remote_code=True)
# In case you want to reduce the maximum length:
model.max_seq_length = 8192
queries = ['Calculate the n-th factorial']
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)
scores = query_embeddings @ document_embeddings.T
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Each query must come with a one-sentence instruction that describes the taskpythontransformers
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a github issue, identify the code that needs to be changed to fix the issue.'
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model = AutoModel.from_pretrained('Salesforce/SweRankEmbed-Large', trust_remote_code=True)
model.eval()
max_length = 8192
queries = ['Calculate the n-th factorial']
queries_with_prefix = [get_detailed_instruct(task, query) for query in queries]
query_inputs = tokenizer(queries_with_prefix, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
documents = ['def fact(n):\n if n < 0:\n raise ValueError\n return 1 if n == 0 else n * fact(n - 1)']
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
# Compute token embeddings
with torch.no_grad():
query_embeddings = last_token_pool(model(**query_inputs).last_hidden_state, query_inputs["attention_mask"]])
document_embeddings = last_token_pool(model(**document_inputs).last_hidden_state, document_inputs["attention_mask"]])
# normalize embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
document_embeddings = torch.nn.functional.normalize(document_embeddings, p=2, dim=1)
scores = torch.mm(query_embeddings, document_embeddings.transpose(0, 1))
for query, query_scores in zip(queries, scores):
doc_score_pairs = list(zip(documents, query_scores))
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output passages & scores
print("Query:", query)
for document, score in doc_score_pairs:
print(score, document)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.