xgen-7b-8k-inst

859
95
7.0B
llama
by
Salesforce
Language Model
OTHER
7B params
New
859 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
16GB+ RAM
Mobile
Laptop
Server
Quick Summary

Official research release for the family of XGen models (`7B`) by Salesforce AI Research: Title: Long Sequence Modeling with XGen: A 7B LLM Trained on 8K Input...

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
7GB+ RAM

Code Examples

pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))
pythontransformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.bfloat16)

header = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
)
article = ""  # insert a document here
prompt = f"### Human: Please summarize the following article.\n\n{article}.\n###"

inputs = tokenizer(header + prompt, return_tensors="pt")
sample = model.generate(**inputs, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256)
output = tokenizer.decode(sample[0])
print(output.strip().replace("Assistant:", ""))

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.