Granite 3.3 8b Lora Math Prm

9
8.0B
1 language
license:apache-2.0
by
ibm-granite
Language Model
OTHER
8B params
New
0 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
18GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
8GB+ RAM

Code Examples

get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
get step positionspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List

def prepare_input(query: str, steps: List[str], tokenizer: AutoTokenizer, correct_token: str, generation_prompt: str):
    messages = []
    
    for s_idx, step in enumerate(steps):
        if s_idx == 0:
            # append query and first step
            message = {'role': 'user', 'content':  query + " " + step + " " + generation_prompt}
        else:
            message = {'role': 'user', 'content':  step + " " + generation_prompt}
        
        messages.append(message)
        messages.append({'role': 'assistant', 'content': correct_token})
    
    input_message = tokenizer.apply_chat_template(messages, add_generation_prompt = False, tokenize = False)

    return input_message

def get_step_ids(input_ids, tokenizer, correct_token, correct_token_id):
    # get assistant turn indices
    asst_text = "<|start_of_role|>assistant<|end_of_role|>" + correct_token + "<|end_of_text|>"
    asst_toks = tokenizer(asst_text, add_special_tokens = False, return_tensors = "pt")['input_ids'][0]
    asst_toks_before_correct_token = asst_toks[:torch.where(asst_toks == correct_token_id)[0].item()].tolist()

    input_ids = input_ids[0]
    # find batch index for assistant turn "Y", not just the correct_token_id
    correct_token_indices = torch.where(input_ids == correct_token_id)[0].tolist()
    prm_indices = []
    for t_idx in correct_token_indices:
        if input_ids[t_idx - len(asst_toks_before_correct_token) :t_idx].tolist() == asst_toks_before_correct_token:
            prm_indices.append(t_idx-1) # the logits for token i predict the token i+1: so, we need to look at the PREVIOUS token logits
    
    assert len(prm_indices)>0
    return prm_indices

model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

correct_token = "Y"
correct_token_id = tokenizer.encode(correct_token, add_special_tokens=False)[0]
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "response":[
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85.",
        "At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80.",
        "To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
        "We are given that the difference is $4, so we can write: $5.80 - (x + $0.85) = $4.",
        "Simplifying the left side, we get: $5.80 - x - $0.85 = $4.",
        "Adding -$0.85 to both sides, we get: $5.80 -x = $3.15.",
        "Subtracting $5.80 from both sides, we get: -x = -$2.65.",
        "Dividing both sides by -1, we get: x = $2.65."
   ]
}


formatted_data = prepare_input(query=data['query'], steps=data['response'], tokenizer=tokenizer, correct_token=correct_token, generation_prompt=generation_prompt)
input_ids = tokenizer.encode(formatted_data, return_tensors="pt").to(model.device)

with torch.no_grad():
    logits = model(input_ids=input_ids).logits

# get step positions
prm_indices = get_step_ids(input_ids, tokenizer, correct_token, correct_token_id)

#  get corresponding rewards: convert logits to probabilities and get the probability of the correct token id as reward
softmax = torch.nn.Softmax(dim=-1)
step_rewards = []
for prm_idx in prm_indices:
    step_rewards.append(softmax(logits[0, prm_idx, :])[correct_token_id].item())

print(step_rewards)
# # [0.9998785257339478, 0.9996663331985474, 0.9991942048072815, 0.9993413090705872, 0.9996351003646851, 0.519490122795105, 0.9416136145591736, 0.9942548871040344]
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>
format the promptspythontransformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_name_or_path = "ibm-granite/granite-3.3-8b-lora-math-prm"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map = "auto")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
generation_prompt = "Is this response correct so far (Y/N)?"


data = {
   "query": "For breakfast, Anna bought a bagel for $x and a glass of orange juice for $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. How much more money did Anna spend on lunch than on breakfast? If we know the answer to the above question is 4, what is the value of unknown variable x?",
   "partial_response":
        "At breakfast, Anna spent x dollars on a bagel and $0.85 on a glass of orange juice. The total cost of breakfast is x + $0.85. At lunch, Anna spent $4.65 on a sandwich and $1.15 on a carton of milk. The total cost of lunch is $4.65 + $1.15 = $5.80. To find out how much more money Anna spent on lunch than on breakfast, we subtract the cost of breakfast from the cost of lunch: $5.80 - (x + $0.85).",
}

# format the prompts
formatted_prompt = tokenizer.apply_chat_template([{'role':'user', 'content': data['query'] + " " + data['partial_response'] + " " + generation_prompt}], add_generation_prompt=True, tokenize=False)
inputs = tokenizer(formatted_prompt, return_tensors="pt")

# generate output
with torch.no_grad():
    response = model.generate(inputs["input_ids"].to(model.device), attention_mask=inputs["attention_mask"].to(model.device), max_new_tokens=2)

output_text = tokenizer.decode(response[0])
print(output_text)
# # <|start_of_role|>assistant<|end_of_role|>Y<|end_of_text|>

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.