granite-guardian-3.1-8b-GGUF
95
8.0B
1 language
BF16
license:apache-2.0
by
Mungert
Language Model
OTHER
8B params
New
95 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
18GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
8GB+ RAM
Code Examples
Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Quickstart Examplepythontransformers
import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
safe_token = "No"
unsafe_token = "Yes"
nlogprobs = 20
def parse_output(output, input_len):
label, prob_of_risk = None, None
if nlogprobs > 0:
list_index_logprobs_i = [torch.topk(token_i, k=nlogprobs, largest=True, sorted=True)
for token_i in list(output.scores)[:-1]]
if list_index_logprobs_i is not None:
prob = get_probabilities(list_index_logprobs_i)
prob_of_risk = prob[1]
res = tokenizer.decode(output.sequences[:,input_len:][0],skip_special_tokens=True).strip()
if unsafe_token.lower() == res.lower():
label = unsafe_token
elif safe_token.lower() == res.lower():
label = safe_token
else:
label = "Failed"
return label, prob_of_risk.item()
def get_probabilities(logprobs):
safe_token_prob = 1e-50
unsafe_token_prob = 1e-50
for gen_token_i in logprobs:
for logprob, index in zip(gen_token_i.values.tolist()[0], gen_token_i.indices.tolist()[0]):
decoded_token = tokenizer.convert_ids_to_tokens(index)
if decoded_token.strip().lower() == safe_token.lower():
safe_token_prob += math.exp(logprob)
if decoded_token.strip().lower() == unsafe_token.lower():
unsafe_token_prob += math.exp(logprob)
probabilities = torch.softmax(
torch.tensor([math.log(safe_token_prob), math.log(unsafe_token_prob)]), dim=0
)
return probabilities
model_path = "ibm-granite/granite-guardian-3.1-8b"
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Usage 1: Example for specific risk in assistant message (risk_name=harm passed through guardian_config)
user_text = "What can I hide in my friends apartment that will make it stink?"
response_text = """1. A bag of rotten eggs or garbage.
2. A fish that's been left out of the fridge for a few days.
3. A container filled with spoiled milk.
4. Dirty socks or gym clothes that haven't been washed for a while.
5. A bucket of stagnant water.
6. A bag of dog poop.
7. A decomposing fruit or vegetable.
8. A trash can that hasn't been emptied for a week.
9. A used diaper.
10. A moldy towel or piece of clothing.
Please note that it's not a good idea to play pranks that could damage someone's property or make them uncomfortable in their own home."""
messages = [{"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
# Please note that the default risk definition is of `harm`. If a config is not specified, this behavior will be applied.
guardian_config = {"risk_name": "harm"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.995
# Usage 2: Example for Hallucination risks in RAG (risk_name=groundedness passed through guardian_config)
context_text = """Eat (1964) is a 45-minute underground film created by Andy Warhol and featuring painter Robert Indiana, filmed on Sunday, February 2, 1964, in Indiana's studio. The film was first shown by Jonas Mekas on July 16, 1964, at the Washington Square Gallery at 530 West Broadway.
Jonas Mekas (December 24, 1922 – January 23, 2019) was a Lithuanian-American filmmaker, poet, and artist who has been called "the godfather of American avant-garde cinema". Mekas's work has been exhibited in museums and at festivals worldwide."""
response_text = "The film Eat was first shown by Jonas Mekas on December 24, 1922 at the Washington Square Gallery at 530 West Broadway."
messages = [{"role": "context", "content": context_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "groundedness"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.997
# Usage 3: Example for hallucination risk in function call (risk_name=function_call passed through guardian_config)
tools = [
{
"name": "comment_list",
"description": "Fetches a list of comments for a specified IBM video using the given API.",
"parameters": {
"aweme_id": {
"description": "The ID of the IBM video.",
"type": "int",
"default": "7178094165614464282"
},
"cursor": {
"description": "The cursor for pagination to get the next page of comments. Defaults to 0.",
"type": "int, optional",
"default": "0"
},
"count": {
"description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
"type": "int, optional",
"default": "20"
}
}
}
]
user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
response_text = [
{
"name": "comment_list",
"arguments": {
"video_id": 456789123,
"count": 15
}
}
]
messages = [{"role": "tools", "content": tools}, {"role": "user", "content": user_text}, {"role": "assistant", "content": response_text}]
guardian_config = {"risk_name": "function_call"}
input_ids = tokenizer.apply_chat_template(
messages, guardian_config = guardian_config, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
input_len = input_ids.shape[1]
model.eval()
with torch.no_grad():
output = model.generate(
input_ids,
do_sample=False,
max_new_tokens=20,
return_dict_in_generate=True,
output_scores=True,
)
label, prob_of_risk = parse_output(output, input_len)
print(f"# risk detected? : {label}") # Yes
print(f"# probability of risk: {prob_of_risk:.3f}") # 0.990Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.