SeaLLMs-v3-7B-Chat
354
61
7.0B
13 languages
—
by
SeaLLMs
Language Model
OTHER
7B params
New
354 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
16GB+ RAM
Mobile
Laptop
Server
Quick Summary
License: other license name: seallms license link: https://huggingface.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
7GB+ RAM
Code Examples
Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")Get started with `Transformers`pythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
prompt = "Hiii How are you?"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
print(f"Formatted text:\n {text}")
print(f"Model input:\n {model_inputs}")
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True, eos_token_id=tokenizer.eos_token_id)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
print(f"Response:\n {response[0]}")prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})prepare messages to modelpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TextStreamer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"SeaLLMs/SeaLLMs-v3-7B-Chat", # can change to "SeaLLMs/SeaLLMs-v3-1.5B-Chat" if your resource is limited
torch_dtype=torch.bfloat16,
device_map=device
)
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLMs-v3-7B-Chat")
# prepare messages to model
messages = [
{"role": "system", "content": "You are a helpful assistant."},
]
while True:
prompt = input("User:")
messages.append({"role": "user", "content": prompt})
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, streamer=streamer)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
messages.append({"role": "assistant", "content": response})Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.