cde-small-v1
83
284
—
by
jxm
Embedding Model
OTHER
New
83 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
The cde-small-v1 model has been deprecated.
Code Examples
Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")Loading the modelpythontransformers
import transformers
model = transformers.AutoModel.from_pretrained("jxm/cde-small-v1", trust_remote_code=True)
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepythonpytorch
minicorpus_size = model.config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
minicorpus_docs = tokenizer(
[document_prefix + doc for doc in minicorpus_docs],
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(model.device)
import torch
from tqdm.autonotebook import tqdm
batch_size = 32
dataset_embeddings = []
for i in tqdm(range(0, len(minicorpus_docs["input_ids"]), batch_size)):
minicorpus_docs_batch = {k: v[i:i+batch_size] for k,v in minicorpus_docs.items()}
with torch.no_grad():
dataset_embeddings.append(
model.first_stage_model(**minicorpus_docs_batch)
)
dataset_embeddings = torch.cat(dataset_embeddings)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)First stagepython
minicorpus_size = model[0].config.transductive_corpus_size
minicorpus_docs = [ ... ] # Put some strings here that are representative of your corpus, for example by calling random.sample(corpus, k=minicorpus_size)
assert len(minicorpus_docs) == minicorpus_size # You must use exactly this many documents in the minicorpus. You can oversample if your corpus is smaller.
dataset_embeddings = model.encode(
minicorpus_docs,
prompt_name="document",
convert_to_tensor=True
)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.